zeph_tools/
config.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use serde::{Deserialize, Serialize};
5
6use crate::permissions::{AutonomyLevel, PermissionPolicy, PermissionsConfig};
7use crate::policy::{PolicyConfig, PolicyRuleConfig};
8
9fn default_true() -> bool {
10    true
11}
12fn default_adversarial_timeout_ms() -> u64 {
13    3_000
14}
15
16fn default_timeout() -> u64 {
17    30
18}
19
20fn default_cache_ttl_secs() -> u64 {
21    300
22}
23
24fn default_confirm_patterns() -> Vec<String> {
25    vec![
26        "rm ".into(),
27        "git push -f".into(),
28        "git push --force".into(),
29        "drop table".into(),
30        "drop database".into(),
31        "truncate ".into(),
32        "$(".into(),
33        "`".into(),
34        "<(".into(),
35        ">(".into(),
36        "<<<".into(),
37        "eval ".into(),
38    ]
39}
40
41fn default_audit_destination() -> String {
42    "stdout".into()
43}
44
45fn default_overflow_threshold() -> usize {
46    50_000
47}
48
49fn default_retention_days() -> u64 {
50    7
51}
52
53fn default_max_overflow_bytes() -> usize {
54    10 * 1024 * 1024 // 10 MiB
55}
56
57/// Configuration for large tool response offload to `SQLite`.
58#[derive(Debug, Clone, Deserialize, Serialize)]
59pub struct OverflowConfig {
60    #[serde(default = "default_overflow_threshold")]
61    pub threshold: usize,
62    #[serde(default = "default_retention_days")]
63    pub retention_days: u64,
64    /// Maximum bytes per overflow entry. `0` means unlimited.
65    #[serde(default = "default_max_overflow_bytes")]
66    pub max_overflow_bytes: usize,
67}
68
69impl Default for OverflowConfig {
70    fn default() -> Self {
71        Self {
72            threshold: default_overflow_threshold(),
73            retention_days: default_retention_days(),
74            max_overflow_bytes: default_max_overflow_bytes(),
75        }
76    }
77}
78
79fn default_anomaly_window() -> usize {
80    10
81}
82
83fn default_anomaly_error_threshold() -> f64 {
84    0.5
85}
86
87fn default_anomaly_critical_threshold() -> f64 {
88    0.8
89}
90
91/// Configuration for the sliding-window anomaly detector.
92#[derive(Debug, Clone, Deserialize, Serialize)]
93pub struct AnomalyConfig {
94    #[serde(default = "default_true")]
95    pub enabled: bool,
96    #[serde(default = "default_anomaly_window")]
97    pub window_size: usize,
98    #[serde(default = "default_anomaly_error_threshold")]
99    pub error_threshold: f64,
100    #[serde(default = "default_anomaly_critical_threshold")]
101    pub critical_threshold: f64,
102    /// Emit a WARN log when a reasoning-enhanced model (o1, o3, `QwQ`, etc.) produces
103    /// a quality failure (`ToolNotFound`, `InvalidParameters`, `TypeMismatch`). Default: `true`.
104    ///
105    /// Based on arXiv:2510.22977 — CoT/RL reasoning amplifies tool hallucination.
106    #[serde(default = "default_true")]
107    pub reasoning_model_warning: bool,
108}
109
110impl Default for AnomalyConfig {
111    fn default() -> Self {
112        Self {
113            enabled: true,
114            window_size: default_anomaly_window(),
115            error_threshold: default_anomaly_error_threshold(),
116            critical_threshold: default_anomaly_critical_threshold(),
117            reasoning_model_warning: true,
118        }
119    }
120}
121
122/// Configuration for the tool result cache.
123#[derive(Debug, Clone, Deserialize, Serialize)]
124pub struct ResultCacheConfig {
125    /// Whether caching is enabled. Default: `true`.
126    #[serde(default = "default_true")]
127    pub enabled: bool,
128    /// Time-to-live in seconds. `0` means entries never expire. Default: `300`.
129    #[serde(default = "default_cache_ttl_secs")]
130    pub ttl_secs: u64,
131}
132
133impl Default for ResultCacheConfig {
134    fn default() -> Self {
135        Self {
136            enabled: true,
137            ttl_secs: default_cache_ttl_secs(),
138        }
139    }
140}
141
142fn default_tafc_complexity_threshold() -> f64 {
143    0.6
144}
145
146/// Configuration for Think-Augmented Function Calling (TAFC).
147#[derive(Debug, Clone, Deserialize, Serialize)]
148pub struct TafcConfig {
149    /// Enable TAFC schema augmentation (default: false).
150    #[serde(default)]
151    pub enabled: bool,
152    /// Complexity threshold tau in [0.0, 1.0]; tools with complexity >= tau are augmented.
153    /// Default: 0.6
154    #[serde(default = "default_tafc_complexity_threshold")]
155    pub complexity_threshold: f64,
156}
157
158impl Default for TafcConfig {
159    fn default() -> Self {
160        Self {
161            enabled: false,
162            complexity_threshold: default_tafc_complexity_threshold(),
163        }
164    }
165}
166
167impl TafcConfig {
168    /// Validate and clamp `complexity_threshold` to \[0.0, 1.0\]. Reset NaN/Infinity to 0.6.
169    #[must_use]
170    pub fn validated(mut self) -> Self {
171        if self.complexity_threshold.is_finite() {
172            self.complexity_threshold = self.complexity_threshold.clamp(0.0, 1.0);
173        } else {
174            self.complexity_threshold = 0.6;
175        }
176        self
177    }
178}
179
180fn default_utility_exempt_tools() -> Vec<String> {
181    vec!["invoke_skill".to_string(), "load_skill".to_string()]
182}
183
184fn default_utility_threshold() -> f32 {
185    0.1
186}
187
188fn default_utility_gain_weight() -> f32 {
189    1.0
190}
191
192fn default_utility_cost_weight() -> f32 {
193    0.5
194}
195
196fn default_utility_redundancy_weight() -> f32 {
197    0.3
198}
199
200fn default_utility_uncertainty_bonus() -> f32 {
201    0.2
202}
203
204/// Configuration for utility-guided tool dispatch (`[tools.utility]` TOML section).
205///
206/// Implements the utility gate from arXiv:2603.19896: each tool call is scored
207/// `U = gain_weight*gain - cost_weight*cost - redundancy_weight*redundancy + uncertainty_bonus*uncertainty`.
208/// Calls with `U < threshold` are skipped (fail-closed on scoring errors).
209#[derive(Debug, Clone, Deserialize, Serialize)]
210#[serde(default)]
211pub struct UtilityScoringConfig {
212    /// Enable utility-guided gating. Default: false (opt-in).
213    pub enabled: bool,
214    /// Minimum utility score required to execute a tool call. Default: 0.1.
215    #[serde(default = "default_utility_threshold")]
216    pub threshold: f32,
217    /// Weight for the estimated gain component. Must be >= 0. Default: 1.0.
218    #[serde(default = "default_utility_gain_weight")]
219    pub gain_weight: f32,
220    /// Weight for the step cost component. Must be >= 0. Default: 0.5.
221    #[serde(default = "default_utility_cost_weight")]
222    pub cost_weight: f32,
223    /// Weight for the redundancy penalty. Must be >= 0. Default: 0.3.
224    #[serde(default = "default_utility_redundancy_weight")]
225    pub redundancy_weight: f32,
226    /// Weight for the exploration bonus. Must be >= 0. Default: 0.2.
227    #[serde(default = "default_utility_uncertainty_bonus")]
228    pub uncertainty_bonus: f32,
229    /// Tool names that bypass the utility gate unconditionally (case-insensitive).
230    /// Auto-populated with file-read tools when `MagicDocs` is enabled. User-specified
231    /// entries are preserved and merged additively with any auto-populated names.
232    #[serde(default = "default_utility_exempt_tools")]
233    pub exempt_tools: Vec<String>,
234}
235
236impl Default for UtilityScoringConfig {
237    fn default() -> Self {
238        Self {
239            enabled: false,
240            threshold: default_utility_threshold(),
241            gain_weight: default_utility_gain_weight(),
242            cost_weight: default_utility_cost_weight(),
243            redundancy_weight: default_utility_redundancy_weight(),
244            uncertainty_bonus: default_utility_uncertainty_bonus(),
245            exempt_tools: default_utility_exempt_tools(),
246        }
247    }
248}
249
250impl UtilityScoringConfig {
251    /// Validate that all weights and threshold are non-negative and finite.
252    ///
253    /// # Errors
254    ///
255    /// Returns a description of the first invalid field found.
256    pub fn validate(&self) -> Result<(), String> {
257        let fields = [
258            ("threshold", self.threshold),
259            ("gain_weight", self.gain_weight),
260            ("cost_weight", self.cost_weight),
261            ("redundancy_weight", self.redundancy_weight),
262            ("uncertainty_bonus", self.uncertainty_bonus),
263        ];
264        for (name, val) in fields {
265            if !val.is_finite() {
266                return Err(format!("[tools.utility] {name} must be finite, got {val}"));
267            }
268            if val < 0.0 {
269                return Err(format!("[tools.utility] {name} must be >= 0, got {val}"));
270            }
271        }
272        Ok(())
273    }
274}
275
276fn default_boost_per_dep() -> f32 {
277    0.15
278}
279
280fn default_max_total_boost() -> f32 {
281    0.2
282}
283
284/// Dependency specification for a single tool.
285#[derive(Debug, Clone, Default, Deserialize, Serialize)]
286pub struct ToolDependency {
287    /// Hard prerequisites: tool is hidden until ALL of these have completed successfully.
288    #[serde(default, skip_serializing_if = "Vec::is_empty")]
289    pub requires: Vec<String>,
290    /// Soft prerequisites: tool gets a similarity boost when these have completed.
291    #[serde(default, skip_serializing_if = "Vec::is_empty")]
292    pub prefers: Vec<String>,
293}
294
295/// Configuration for the tool dependency graph feature.
296#[derive(Debug, Clone, Deserialize, Serialize)]
297pub struct DependencyConfig {
298    /// Whether dependency gating is enabled. Default: false.
299    #[serde(default)]
300    pub enabled: bool,
301    /// Similarity boost added per satisfied `prefers` dependency. Default: 0.15.
302    #[serde(default = "default_boost_per_dep")]
303    pub boost_per_dep: f32,
304    /// Maximum total boost applied regardless of how many `prefers` deps are met. Default: 0.2.
305    #[serde(default = "default_max_total_boost")]
306    pub max_total_boost: f32,
307    /// Per-tool dependency rules. Key is `tool_id`.
308    #[serde(default)]
309    pub rules: std::collections::HashMap<String, ToolDependency>,
310}
311
312impl Default for DependencyConfig {
313    fn default() -> Self {
314        Self {
315            enabled: false,
316            boost_per_dep: default_boost_per_dep(),
317            max_total_boost: default_max_total_boost(),
318            rules: std::collections::HashMap::new(),
319        }
320    }
321}
322
323fn default_retry_max_attempts() -> usize {
324    2
325}
326
327fn default_retry_base_ms() -> u64 {
328    500
329}
330
331fn default_retry_max_ms() -> u64 {
332    5_000
333}
334
335fn default_retry_budget_secs() -> u64 {
336    30
337}
338
339/// Configuration for tool error retry behavior.
340#[derive(Debug, Clone, Deserialize, Serialize)]
341pub struct RetryConfig {
342    /// Maximum retry attempts for transient errors per tool call. 0 = disabled.
343    #[serde(default = "default_retry_max_attempts")]
344    pub max_attempts: usize,
345    /// Base delay (ms) for exponential backoff.
346    #[serde(default = "default_retry_base_ms")]
347    pub base_ms: u64,
348    /// Maximum delay cap (ms) for exponential backoff.
349    #[serde(default = "default_retry_max_ms")]
350    pub max_ms: u64,
351    /// Maximum wall-clock time (seconds) for all retries of a single tool call. 0 = unlimited.
352    #[serde(default = "default_retry_budget_secs")]
353    pub budget_secs: u64,
354    /// Provider name from `[[llm.providers]]` for LLM-based parameter reformatting on
355    /// `InvalidParameters`/`TypeMismatch` errors. Empty string = disabled.
356    #[serde(default)]
357    pub parameter_reformat_provider: String,
358}
359
360impl Default for RetryConfig {
361    fn default() -> Self {
362        Self {
363            max_attempts: default_retry_max_attempts(),
364            base_ms: default_retry_base_ms(),
365            max_ms: default_retry_max_ms(),
366            budget_secs: default_retry_budget_secs(),
367            parameter_reformat_provider: String::new(),
368        }
369    }
370}
371
372/// Configuration for the LLM-based adversarial policy agent.
373#[derive(Debug, Clone, Deserialize, Serialize)]
374pub struct AdversarialPolicyConfig {
375    /// Enable the adversarial policy agent. Default: `false`.
376    #[serde(default)]
377    pub enabled: bool,
378    /// Provider name from `[[llm.providers]]` for the policy validation LLM.
379    /// Should reference a fast, cheap model (e.g. `gpt-4o-mini`).
380    /// Empty string = fall back to the default provider.
381    #[serde(default)]
382    pub policy_provider: String,
383    /// Path to a plain-text policy file. Each non-empty, non-comment line is one policy.
384    pub policy_file: Option<String>,
385    /// Whether to allow tool calls when the policy LLM fails (timeout/error).
386    /// Default: `false` (fail-closed / deny on error).
387    ///
388    /// Setting this to `true` trades security for availability. Use only in
389    /// deployments where the declarative `PolicyEnforcer` already covers hard rules.
390    #[serde(default)]
391    pub fail_open: bool,
392    /// Timeout in milliseconds for a single policy LLM call. Default: 3000.
393    #[serde(default = "default_adversarial_timeout_ms")]
394    pub timeout_ms: u64,
395    /// Tool names that are always allowed through the adversarial policy gate,
396    /// regardless of policy content. Covers internal agent operations that are
397    /// not externally visible side effects.
398    #[serde(default = "AdversarialPolicyConfig::default_exempt_tools")]
399    pub exempt_tools: Vec<String>,
400}
401impl Default for AdversarialPolicyConfig {
402    fn default() -> Self {
403        Self {
404            enabled: false,
405            policy_provider: String::new(),
406            policy_file: None,
407            fail_open: false,
408            timeout_ms: default_adversarial_timeout_ms(),
409            exempt_tools: Self::default_exempt_tools(),
410        }
411    }
412}
413impl AdversarialPolicyConfig {
414    fn default_exempt_tools() -> Vec<String> {
415        vec![
416            "memory_save".into(),
417            "memory_search".into(),
418            "read_overflow".into(),
419            "load_skill".into(),
420            "invoke_skill".into(),
421            "schedule_deferred".into(),
422        ]
423    }
424}
425
426/// Per-path read allow/deny sandbox for the file tool.
427///
428/// Evaluation order: deny-then-allow. If a path matches `deny_read` and does NOT
429/// match `allow_read`, access is denied. Empty `deny_read` means no read restrictions.
430///
431/// All patterns are matched against the canonicalized (absolute, symlink-resolved) path.
432#[derive(Debug, Clone, Default, Deserialize, Serialize)]
433pub struct FileConfig {
434    /// Glob patterns for paths denied for reading. Evaluated first.
435    #[serde(default)]
436    pub deny_read: Vec<String>,
437    /// Glob patterns for paths allowed for reading. Evaluated second (overrides deny).
438    #[serde(default)]
439    pub allow_read: Vec<String>,
440}
441
442/// Top-level configuration for tool execution.
443#[derive(Debug, Deserialize, Serialize)]
444pub struct ToolsConfig {
445    #[serde(default = "default_true")]
446    pub enabled: bool,
447    #[serde(default = "default_true")]
448    pub summarize_output: bool,
449    #[serde(default)]
450    pub shell: ShellConfig,
451    #[serde(default)]
452    pub scrape: ScrapeConfig,
453    #[serde(default)]
454    pub audit: AuditConfig,
455    #[serde(default)]
456    pub permissions: Option<PermissionsConfig>,
457    #[serde(default)]
458    pub filters: crate::filter::FilterConfig,
459    #[serde(default)]
460    pub overflow: OverflowConfig,
461    #[serde(default)]
462    pub anomaly: AnomalyConfig,
463    #[serde(default)]
464    pub result_cache: ResultCacheConfig,
465    #[serde(default)]
466    pub tafc: TafcConfig,
467    #[serde(default)]
468    pub dependencies: DependencyConfig,
469    #[serde(default)]
470    pub retry: RetryConfig,
471    /// Declarative policy compiler for tool call authorization.
472    #[serde(default)]
473    pub policy: PolicyConfig,
474    /// LLM-based adversarial policy agent for natural-language policy enforcement.
475    #[serde(default)]
476    pub adversarial_policy: AdversarialPolicyConfig,
477    /// Utility-guided tool dispatch gate.
478    #[serde(default)]
479    pub utility: UtilityScoringConfig,
480    /// Per-path read allow/deny sandbox for the file tool.
481    #[serde(default)]
482    pub file: FileConfig,
483    /// OAP declarative pre-action authorization. Rules are merged into `PolicyEnforcer` at
484    /// startup. Authorization rules are appended after `policy.rules` — policy rules take
485    /// precedence (first-match-wins semantics). This means existing policy allow/deny rules
486    /// are evaluated before authorization rules.
487    #[serde(default)]
488    pub authorization: AuthorizationConfig,
489    /// Maximum tool calls allowed per agent session. `None` = unlimited (default).
490    /// Counted on the first attempt only — retries do not consume additional quota slots.
491    #[serde(default)]
492    pub max_tool_calls_per_session: Option<u32>,
493    /// Speculative tool execution configuration.
494    ///
495    /// Runtime-only; no cargo feature gate. Default mode is `off`.
496    #[serde(default)]
497    pub speculative: SpeculativeConfig,
498    /// OS-level subprocess sandbox configuration (`[tools.sandbox]` TOML section).
499    ///
500    /// When `enabled = true`, all shell commands are wrapped in an OS-native sandbox
501    /// (macOS Seatbelt or Linux bwrap + Landlock). Default: disabled.
502    #[serde(default)]
503    pub sandbox: SandboxConfig,
504    /// Egress network event logging configuration.
505    #[serde(default)]
506    pub egress: EgressConfig,
507}
508
509impl ToolsConfig {
510    /// Build a `PermissionPolicy` from explicit config or legacy shell fields.
511    #[must_use]
512    pub fn permission_policy(&self, autonomy_level: AutonomyLevel) -> PermissionPolicy {
513        let policy = if let Some(ref perms) = self.permissions {
514            PermissionPolicy::from(perms.clone())
515        } else {
516            PermissionPolicy::from_legacy(
517                &self.shell.blocked_commands,
518                &self.shell.confirm_patterns,
519            )
520        };
521        policy.with_autonomy(autonomy_level)
522    }
523}
524
525/// Shell-specific configuration: timeout, command blocklist, and allowlist overrides.
526#[derive(Debug, Deserialize, Serialize)]
527#[allow(clippy::struct_excessive_bools)]
528pub struct ShellConfig {
529    #[serde(default = "default_timeout")]
530    pub timeout: u64,
531    #[serde(default)]
532    pub blocked_commands: Vec<String>,
533    #[serde(default)]
534    pub allowed_commands: Vec<String>,
535    #[serde(default)]
536    pub allowed_paths: Vec<String>,
537    #[serde(default = "default_true")]
538    pub allow_network: bool,
539    #[serde(default = "default_confirm_patterns")]
540    pub confirm_patterns: Vec<String>,
541    /// Environment variable name prefixes to strip from subprocess environment.
542    /// Variables whose names start with any of these prefixes are removed before
543    /// spawning shell commands. Default covers common credential naming conventions.
544    #[serde(default = "ShellConfig::default_env_blocklist")]
545    pub env_blocklist: Vec<String>,
546    /// Enable transactional mode: snapshot files before write commands, rollback on failure.
547    #[serde(default)]
548    pub transactional: bool,
549    /// Glob patterns defining which paths are eligible for snapshotting.
550    /// Only files matching these patterns (relative to cwd) are captured.
551    /// Empty = snapshot all files referenced in the command.
552    #[serde(default)]
553    pub transaction_scope: Vec<String>,
554    /// Automatically rollback when exit code >= 2. Default: false.
555    /// Exit code 1 is excluded because many tools (grep, diff, test) use it for
556    /// non-error conditions.
557    #[serde(default)]
558    pub auto_rollback: bool,
559    /// Exit codes that trigger auto-rollback. Default: empty (uses >= 2 heuristic).
560    /// When non-empty, only these exact exit codes trigger rollback.
561    #[serde(default)]
562    pub auto_rollback_exit_codes: Vec<i32>,
563    /// When true, snapshot failure aborts execution with an error.
564    /// When false (default), snapshot failure emits a warning and execution proceeds.
565    #[serde(default)]
566    pub snapshot_required: bool,
567    /// Maximum cumulative bytes for transaction snapshots. 0 = unlimited.
568    #[serde(default)]
569    pub max_snapshot_bytes: u64,
570}
571
572impl ShellConfig {
573    #[must_use]
574    pub fn default_env_blocklist() -> Vec<String> {
575        vec![
576            "ZEPH_".into(),
577            "AWS_".into(),
578            "AZURE_".into(),
579            "GCP_".into(),
580            "GOOGLE_".into(),
581            "OPENAI_".into(),
582            "ANTHROPIC_".into(),
583            "HF_".into(),
584            "HUGGING".into(),
585        ]
586    }
587}
588
589/// Configuration for audit logging of tool executions.
590#[derive(Debug, Deserialize, Serialize)]
591pub struct AuditConfig {
592    #[serde(default = "default_true")]
593    pub enabled: bool,
594    #[serde(default = "default_audit_destination")]
595    pub destination: String,
596    /// When true, log a per-tool risk summary at startup.
597    /// Each entry includes: tool name, privilege level, and expected input sanitization.
598    /// This is a design-time risk inventory, NOT runtime static analysis or a guarantee
599    /// that sanitization is functioning correctly.
600    #[serde(default)]
601    pub tool_risk_summary: bool,
602}
603
604impl Default for ToolsConfig {
605    fn default() -> Self {
606        Self {
607            enabled: true,
608            summarize_output: true,
609            shell: ShellConfig::default(),
610            scrape: ScrapeConfig::default(),
611            audit: AuditConfig::default(),
612            permissions: None,
613            filters: crate::filter::FilterConfig::default(),
614            overflow: OverflowConfig::default(),
615            anomaly: AnomalyConfig::default(),
616            result_cache: ResultCacheConfig::default(),
617            tafc: TafcConfig::default(),
618            dependencies: DependencyConfig::default(),
619            retry: RetryConfig::default(),
620            policy: PolicyConfig::default(),
621            adversarial_policy: AdversarialPolicyConfig::default(),
622            utility: UtilityScoringConfig::default(),
623            file: FileConfig::default(),
624            authorization: AuthorizationConfig::default(),
625            max_tool_calls_per_session: None,
626            speculative: SpeculativeConfig::default(),
627            sandbox: SandboxConfig::default(),
628            egress: EgressConfig::default(),
629        }
630    }
631}
632
633fn default_max_in_flight() -> usize {
634    4
635}
636
637fn default_confidence_threshold() -> f32 {
638    0.55
639}
640
641fn default_max_wasted_per_minute() -> u64 {
642    100
643}
644
645fn default_ttl_seconds() -> u64 {
646    30
647}
648
649fn default_min_observations() -> u32 {
650    5
651}
652
653fn default_half_life_days() -> f64 {
654    14.0
655}
656
657/// Speculative tool execution mode.
658///
659/// Controls whether and how the agent pre-dispatches tool calls before the LLM
660/// finishes decoding the full tool-use block.
661#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
662#[serde(rename_all = "kebab-case")]
663pub enum SpeculationMode {
664    /// No speculation; uses existing synchronous path.
665    #[default]
666    Off,
667    /// LLM-decoding level: fires tools when streaming partial JSON has all required fields.
668    Decoding,
669    /// Application-level pattern (PASTE): predicts top-K calls from `SQLite` history.
670    Pattern,
671    /// Both decoding and pattern speculation active.
672    Both,
673}
674
675/// Pattern-based (PASTE) speculative execution config.
676///
677/// Controls the SQLite-backed tool sequence learning subsystem. Disabled by default for
678/// privacy and performance reasons; opt-in per deployment.
679#[derive(Debug, Clone, Deserialize, Serialize)]
680pub struct SpeculativePatternConfig {
681    /// Enable PASTE pattern learning and prediction. Default: false.
682    #[serde(default)]
683    pub enabled: bool,
684    /// Minimum observed occurrences before a prediction is issued.
685    #[serde(default = "default_min_observations")]
686    pub min_observations: u32,
687    /// Exponential decay half-life in days for pattern scoring.
688    #[serde(default = "default_half_life_days")]
689    pub half_life_days: f64,
690    /// LLM provider name (from `[[llm.providers]]`) for optional reranking.
691    /// Empty string disables LLM reranking; scoring-only path is used.
692    #[serde(default)]
693    pub rerank_provider: String,
694}
695
696impl Default for SpeculativePatternConfig {
697    fn default() -> Self {
698        Self {
699            enabled: false,
700            min_observations: default_min_observations(),
701            half_life_days: default_half_life_days(),
702            rerank_provider: String::new(),
703        }
704    }
705}
706
707/// Shell command regex allowlist for speculative execution.
708///
709/// Only commands matching at least one regex in this list are eligible for speculation.
710/// Default: empty (speculation disabled for shell by default).
711#[derive(Debug, Clone, Default, Deserialize, Serialize)]
712pub struct SpeculativeAllowlistConfig {
713    /// Regexes matched against the full `bash` command string. Empty = no shell speculation.
714    #[serde(default)]
715    pub shell: Vec<String>,
716}
717
718/// Top-level configuration for speculative tool execution.
719///
720/// All settings here are runtime-only: no cargo feature gates this section.
721/// The module always compiles; branches are never taken when `mode = "off"`.
722///
723/// # Examples
724///
725/// ```toml
726/// [tools.speculative]
727/// mode = "both"
728/// max_in_flight = 4
729/// ttl_seconds = 30
730///
731/// [tools.speculative.pattern]
732/// enabled = false
733/// ```
734#[derive(Debug, Clone, Deserialize, Serialize)]
735pub struct SpeculativeConfig {
736    /// Speculation mode. Default: `off`.
737    #[serde(default)]
738    pub mode: SpeculationMode,
739    /// Maximum concurrent in-flight speculative tasks. Bounded to `[1, 16]`.
740    #[serde(default = "default_max_in_flight")]
741    pub max_in_flight: usize,
742    /// Minimum confidence score `[0, 1]` to dispatch a speculative task.
743    #[serde(default = "default_confidence_threshold")]
744    pub confidence_threshold: f32,
745    /// Circuit-breaker: disable speculation for 60 s when wasted ms exceeds this per minute.
746    #[serde(default = "default_max_wasted_per_minute")]
747    pub max_wasted_per_minute: u64,
748    /// Per-handle wall-clock TTL in seconds before the handle is cancelled.
749    #[serde(default = "default_ttl_seconds")]
750    pub ttl_seconds: u64,
751    /// Emit `AuditEntry` for speculative dispatches (with `result: speculative_discarded`).
752    #[serde(default = "default_true")]
753    pub audit: bool,
754    /// PASTE pattern learning config.
755    #[serde(default)]
756    pub pattern: SpeculativePatternConfig,
757    /// Per-executor command allowlists.
758    #[serde(default)]
759    pub allowlist: SpeculativeAllowlistConfig,
760}
761
762impl Default for SpeculativeConfig {
763    fn default() -> Self {
764        Self {
765            mode: SpeculationMode::Off,
766            max_in_flight: default_max_in_flight(),
767            confidence_threshold: default_confidence_threshold(),
768            max_wasted_per_minute: default_max_wasted_per_minute(),
769            ttl_seconds: default_ttl_seconds(),
770            audit: true,
771            pattern: SpeculativePatternConfig::default(),
772            allowlist: SpeculativeAllowlistConfig::default(),
773        }
774    }
775}
776
777impl Default for ShellConfig {
778    fn default() -> Self {
779        Self {
780            timeout: default_timeout(),
781            blocked_commands: Vec::new(),
782            allowed_commands: Vec::new(),
783            allowed_paths: Vec::new(),
784            allow_network: true,
785            confirm_patterns: default_confirm_patterns(),
786            env_blocklist: Self::default_env_blocklist(),
787            transactional: false,
788            transaction_scope: Vec::new(),
789            auto_rollback: false,
790            auto_rollback_exit_codes: Vec::new(),
791            snapshot_required: false,
792            max_snapshot_bytes: 0,
793        }
794    }
795}
796
797impl Default for AuditConfig {
798    fn default() -> Self {
799        Self {
800            enabled: true,
801            destination: default_audit_destination(),
802            tool_risk_summary: false,
803        }
804    }
805}
806
807/// OAP-style declarative authorization. Rules are merged into `PolicyEnforcer` at startup.
808///
809/// Precedence: `policy.rules` are evaluated first (first-match-wins), then `authorization.rules`.
810/// Use `[tools.policy]` for deny-wins safety rules; use `[tools.authorization]` for
811/// capability-based allow/deny rules that layer on top.
812#[derive(Debug, Clone, Default, Deserialize, Serialize)]
813pub struct AuthorizationConfig {
814    /// Enable OAP authorization checks. When false, `rules` are ignored. Default: false.
815    #[serde(default)]
816    pub enabled: bool,
817    /// Per-tool authorization rules. Appended after `[tools.policy]` rules at startup.
818    #[serde(default)]
819    pub rules: Vec<PolicyRuleConfig>,
820}
821
822/// Configuration for egress network event logging.
823///
824/// Controls what outbound HTTP events are emitted to the audit JSONL stream and
825/// surfaced in the TUI Security panel. Domain allow/deny policy is NOT duplicated
826/// here — it remains solely in [`ScrapeConfig`].
827#[derive(Debug, Clone, Deserialize, Serialize)]
828#[serde(default)]
829#[allow(clippy::struct_excessive_bools)]
830pub struct EgressConfig {
831    /// Master switch for egress event emission. Default: `true`.
832    pub enabled: bool,
833    /// Emit [`EgressEvent`](crate::audit::EgressEvent)s for requests blocked by
834    /// SSRF/domain/scheme checks. Default: `true`.
835    pub log_blocked: bool,
836    /// Include `response_bytes` in the JSONL record. Default: `true`.
837    pub log_response_bytes: bool,
838    /// Show real hostname in `MetricsSnapshot::egress_recent` (TUI). When `false`,
839    /// `"***"` is stored instead. JSONL always keeps the real host. Default: `true`.
840    pub log_hosts_to_tui: bool,
841}
842
843impl Default for EgressConfig {
844    fn default() -> Self {
845        Self {
846            enabled: true,
847            log_blocked: true,
848            log_response_bytes: true,
849            log_hosts_to_tui: true,
850        }
851    }
852}
853
854fn default_scrape_timeout() -> u64 {
855    15
856}
857
858fn default_max_body_bytes() -> usize {
859    4_194_304
860}
861
862/// Configuration for the web scrape tool.
863#[derive(Debug, Deserialize, Serialize)]
864pub struct ScrapeConfig {
865    #[serde(default = "default_scrape_timeout")]
866    pub timeout: u64,
867    #[serde(default = "default_max_body_bytes")]
868    pub max_body_bytes: usize,
869    /// Domain allowlist. Empty = all public domains allowed (default, existing behavior).
870    /// When non-empty, ONLY URLs whose host matches an entry are permitted (deny-unknown).
871    /// Supports exact match (`"docs.rs"`) and wildcard prefix (`"*.rust-lang.org"`).
872    /// Wildcard `*` matches a single subdomain segment only.
873    ///
874    /// Operators SHOULD set an explicit allowlist in production deployments.
875    /// Empty allowlist with a non-empty `denied_domains` is a denylist-only configuration
876    /// which is NOT a security boundary — an attacker can use any domain not on the list.
877    #[serde(default)]
878    pub allowed_domains: Vec<String>,
879    /// Domain denylist. Always enforced, regardless of allowlist state.
880    /// Supports the same pattern syntax as `allowed_domains`.
881    #[serde(default)]
882    pub denied_domains: Vec<String>,
883}
884
885impl Default for ScrapeConfig {
886    fn default() -> Self {
887        Self {
888            timeout: default_scrape_timeout(),
889            max_body_bytes: default_max_body_bytes(),
890            allowed_domains: Vec::new(),
891            denied_domains: Vec::new(),
892        }
893    }
894}
895
896fn default_sandbox_profile() -> crate::sandbox::SandboxProfile {
897    crate::sandbox::SandboxProfile::Workspace
898}
899
900fn default_sandbox_backend() -> String {
901    "auto".into()
902}
903
904/// OS-level subprocess sandbox configuration (`[tools.sandbox]` TOML section).
905///
906/// When `enabled = true`, all shell commands are wrapped in an OS-native sandbox:
907/// - **macOS**: `sandbox-exec` (Seatbelt) with a generated `TinyScheme` profile.
908/// - **Linux** (requires `sandbox` cargo feature): `bwrap` + Landlock + seccomp BPF.
909///
910/// This sandbox applies **only to subprocess executors** (shell). In-process executors
911/// (`WebScrapeExecutor`, `FileExecutor`) are not covered — see `NFR-SB-1`.
912///
913/// # Examples
914///
915/// ```toml
916/// [tools.sandbox]
917/// enabled = true
918/// profile = "workspace"
919/// allow_read  = ["$HOME/.cache/zeph"]
920/// allow_write = ["./.local"]
921/// strict = true
922/// backend = "auto"
923/// ```
924#[derive(Debug, Clone, Deserialize, Serialize)]
925pub struct SandboxConfig {
926    /// Enable OS-level sandbox. Default: `false`.
927    ///
928    /// On Linux requires the `sandbox` cargo feature. When `true` but the feature is absent,
929    /// startup emits `WARN` and degrades to noop (fail-open). Use `strict = true` to
930    /// make the feature absence an error instead.
931    #[serde(default)]
932    pub enabled: bool,
933
934    /// Enforcement profile controlling the baseline restrictions.
935    #[serde(default = "default_sandbox_profile")]
936    pub profile: crate::sandbox::SandboxProfile,
937
938    /// Additional paths granted read access. Resolved to absolute paths at startup.
939    #[serde(default)]
940    pub allow_read: Vec<std::path::PathBuf>,
941
942    /// Additional paths granted write access. Resolved to absolute paths at startup.
943    #[serde(default)]
944    pub allow_write: Vec<std::path::PathBuf>,
945
946    /// When `true`, sandbox initialization failure aborts startup (fail-closed). Default: `true`.
947    #[serde(default = "default_true")]
948    pub strict: bool,
949
950    /// OS backend hint: `"auto"` / `"seatbelt"` / `"landlock-bwrap"` / `"noop"`.
951    ///
952    /// `"auto"` selects the best available backend for the current platform.
953    #[serde(default = "default_sandbox_backend")]
954    pub backend: String,
955}
956
957impl Default for SandboxConfig {
958    fn default() -> Self {
959        Self {
960            enabled: false,
961            profile: default_sandbox_profile(),
962            allow_read: Vec::new(),
963            allow_write: Vec::new(),
964            strict: true,
965            backend: default_sandbox_backend(),
966        }
967    }
968}
969
970#[cfg(test)]
971mod tests {
972    use super::*;
973
974    #[test]
975    fn deserialize_default_config() {
976        let toml_str = r#"
977            enabled = true
978
979            [shell]
980            timeout = 60
981            blocked_commands = ["rm -rf /", "sudo"]
982        "#;
983
984        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
985        assert!(config.enabled);
986        assert_eq!(config.shell.timeout, 60);
987        assert_eq!(config.shell.blocked_commands.len(), 2);
988        assert_eq!(config.shell.blocked_commands[0], "rm -rf /");
989        assert_eq!(config.shell.blocked_commands[1], "sudo");
990    }
991
992    #[test]
993    fn empty_blocked_commands() {
994        let toml_str = r"
995            [shell]
996            timeout = 30
997        ";
998
999        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1000        assert!(config.enabled);
1001        assert_eq!(config.shell.timeout, 30);
1002        assert!(config.shell.blocked_commands.is_empty());
1003    }
1004
1005    #[test]
1006    fn default_tools_config() {
1007        let config = ToolsConfig::default();
1008        assert!(config.enabled);
1009        assert!(config.summarize_output);
1010        assert_eq!(config.shell.timeout, 30);
1011        assert!(config.shell.blocked_commands.is_empty());
1012        assert!(config.audit.enabled);
1013    }
1014
1015    #[test]
1016    fn tools_summarize_output_default_true() {
1017        let config = ToolsConfig::default();
1018        assert!(config.summarize_output);
1019    }
1020
1021    #[test]
1022    fn tools_summarize_output_parsing() {
1023        let toml_str = r"
1024            summarize_output = true
1025        ";
1026        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1027        assert!(config.summarize_output);
1028    }
1029
1030    #[test]
1031    fn default_shell_config() {
1032        let config = ShellConfig::default();
1033        assert_eq!(config.timeout, 30);
1034        assert!(config.blocked_commands.is_empty());
1035        assert!(config.allowed_paths.is_empty());
1036        assert!(config.allow_network);
1037        assert!(!config.confirm_patterns.is_empty());
1038    }
1039
1040    #[test]
1041    fn deserialize_omitted_fields_use_defaults() {
1042        let toml_str = "";
1043        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1044        assert!(config.enabled);
1045        assert_eq!(config.shell.timeout, 30);
1046        assert!(config.shell.blocked_commands.is_empty());
1047        assert!(config.shell.allow_network);
1048        assert!(!config.shell.confirm_patterns.is_empty());
1049        assert_eq!(config.scrape.timeout, 15);
1050        assert_eq!(config.scrape.max_body_bytes, 4_194_304);
1051        assert!(config.audit.enabled);
1052        assert_eq!(config.audit.destination, "stdout");
1053        assert!(config.summarize_output);
1054    }
1055
1056    #[test]
1057    fn default_scrape_config() {
1058        let config = ScrapeConfig::default();
1059        assert_eq!(config.timeout, 15);
1060        assert_eq!(config.max_body_bytes, 4_194_304);
1061    }
1062
1063    #[test]
1064    fn deserialize_scrape_config() {
1065        let toml_str = r"
1066            [scrape]
1067            timeout = 30
1068            max_body_bytes = 2097152
1069        ";
1070
1071        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1072        assert_eq!(config.scrape.timeout, 30);
1073        assert_eq!(config.scrape.max_body_bytes, 2_097_152);
1074    }
1075
1076    #[test]
1077    fn tools_config_default_includes_scrape() {
1078        let config = ToolsConfig::default();
1079        assert_eq!(config.scrape.timeout, 15);
1080        assert_eq!(config.scrape.max_body_bytes, 4_194_304);
1081    }
1082
1083    #[test]
1084    fn deserialize_allowed_commands() {
1085        let toml_str = r#"
1086            [shell]
1087            timeout = 30
1088            allowed_commands = ["curl", "wget"]
1089        "#;
1090
1091        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1092        assert_eq!(config.shell.allowed_commands, vec!["curl", "wget"]);
1093    }
1094
1095    #[test]
1096    fn default_allowed_commands_empty() {
1097        let config = ShellConfig::default();
1098        assert!(config.allowed_commands.is_empty());
1099    }
1100
1101    #[test]
1102    fn deserialize_shell_security_fields() {
1103        let toml_str = r#"
1104            [shell]
1105            allowed_paths = ["/tmp", "/home/user"]
1106            allow_network = false
1107            confirm_patterns = ["rm ", "drop table"]
1108        "#;
1109
1110        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1111        assert_eq!(config.shell.allowed_paths, vec!["/tmp", "/home/user"]);
1112        assert!(!config.shell.allow_network);
1113        assert_eq!(config.shell.confirm_patterns, vec!["rm ", "drop table"]);
1114    }
1115
1116    #[test]
1117    fn deserialize_audit_config() {
1118        let toml_str = r#"
1119            [audit]
1120            enabled = true
1121            destination = "/var/log/zeph-audit.log"
1122        "#;
1123
1124        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1125        assert!(config.audit.enabled);
1126        assert_eq!(config.audit.destination, "/var/log/zeph-audit.log");
1127    }
1128
1129    #[test]
1130    fn default_audit_config() {
1131        let config = AuditConfig::default();
1132        assert!(config.enabled);
1133        assert_eq!(config.destination, "stdout");
1134    }
1135
1136    #[test]
1137    fn permission_policy_from_legacy_fields() {
1138        let config = ToolsConfig {
1139            shell: ShellConfig {
1140                blocked_commands: vec!["sudo".to_owned()],
1141                confirm_patterns: vec!["rm ".to_owned()],
1142                ..ShellConfig::default()
1143            },
1144            ..ToolsConfig::default()
1145        };
1146        let policy = config.permission_policy(AutonomyLevel::Supervised);
1147        assert_eq!(
1148            policy.check("bash", "sudo apt"),
1149            crate::permissions::PermissionAction::Deny
1150        );
1151        assert_eq!(
1152            policy.check("bash", "rm file"),
1153            crate::permissions::PermissionAction::Ask
1154        );
1155    }
1156
1157    #[test]
1158    fn permission_policy_from_explicit_config() {
1159        let toml_str = r#"
1160            [permissions]
1161            [[permissions.bash]]
1162            pattern = "*sudo*"
1163            action = "deny"
1164        "#;
1165        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1166        let policy = config.permission_policy(AutonomyLevel::Supervised);
1167        assert_eq!(
1168            policy.check("bash", "sudo rm"),
1169            crate::permissions::PermissionAction::Deny
1170        );
1171    }
1172
1173    #[test]
1174    fn permission_policy_default_uses_legacy() {
1175        let config = ToolsConfig::default();
1176        assert!(config.permissions.is_none());
1177        let policy = config.permission_policy(AutonomyLevel::Supervised);
1178        // Default ShellConfig has confirm_patterns, so legacy rules are generated
1179        assert!(!config.shell.confirm_patterns.is_empty());
1180        assert!(policy.rules().contains_key("bash"));
1181    }
1182
1183    #[test]
1184    fn deserialize_overflow_config_full() {
1185        let toml_str = r"
1186            [overflow]
1187            threshold = 100000
1188            retention_days = 14
1189            max_overflow_bytes = 5242880
1190        ";
1191        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1192        assert_eq!(config.overflow.threshold, 100_000);
1193        assert_eq!(config.overflow.retention_days, 14);
1194        assert_eq!(config.overflow.max_overflow_bytes, 5_242_880);
1195    }
1196
1197    #[test]
1198    fn deserialize_overflow_config_unknown_dir_field_is_ignored() {
1199        // Old configs with `dir = "..."` must not fail deserialization.
1200        let toml_str = r#"
1201            [overflow]
1202            threshold = 75000
1203            dir = "/tmp/overflow"
1204        "#;
1205        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1206        assert_eq!(config.overflow.threshold, 75_000);
1207    }
1208
1209    #[test]
1210    fn deserialize_overflow_config_partial_uses_defaults() {
1211        let toml_str = r"
1212            [overflow]
1213            threshold = 75000
1214        ";
1215        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1216        assert_eq!(config.overflow.threshold, 75_000);
1217        assert_eq!(config.overflow.retention_days, 7);
1218    }
1219
1220    #[test]
1221    fn deserialize_overflow_config_omitted_uses_defaults() {
1222        let config: ToolsConfig = toml::from_str("").unwrap();
1223        assert_eq!(config.overflow.threshold, 50_000);
1224        assert_eq!(config.overflow.retention_days, 7);
1225        assert_eq!(config.overflow.max_overflow_bytes, 10 * 1024 * 1024);
1226    }
1227
1228    #[test]
1229    fn result_cache_config_defaults() {
1230        let config = ResultCacheConfig::default();
1231        assert!(config.enabled);
1232        assert_eq!(config.ttl_secs, 300);
1233    }
1234
1235    #[test]
1236    fn deserialize_result_cache_config() {
1237        let toml_str = r"
1238            [result_cache]
1239            enabled = false
1240            ttl_secs = 60
1241        ";
1242        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1243        assert!(!config.result_cache.enabled);
1244        assert_eq!(config.result_cache.ttl_secs, 60);
1245    }
1246
1247    #[test]
1248    fn result_cache_omitted_uses_defaults() {
1249        let config: ToolsConfig = toml::from_str("").unwrap();
1250        assert!(config.result_cache.enabled);
1251        assert_eq!(config.result_cache.ttl_secs, 300);
1252    }
1253
1254    #[test]
1255    fn result_cache_ttl_zero_is_valid() {
1256        let toml_str = r"
1257            [result_cache]
1258            ttl_secs = 0
1259        ";
1260        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1261        assert_eq!(config.result_cache.ttl_secs, 0);
1262    }
1263
1264    #[test]
1265    fn adversarial_policy_default_exempt_tools_contains_skill_ops() {
1266        let exempt = AdversarialPolicyConfig::default_exempt_tools();
1267        assert!(
1268            exempt.contains(&"load_skill".to_string()),
1269            "default exempt_tools must contain load_skill"
1270        );
1271        assert!(
1272            exempt.contains(&"invoke_skill".to_string()),
1273            "default exempt_tools must contain invoke_skill"
1274        );
1275    }
1276
1277    #[test]
1278    fn utility_scoring_default_exempt_tools_contains_skill_ops() {
1279        let cfg = UtilityScoringConfig::default();
1280        assert!(
1281            cfg.exempt_tools.contains(&"invoke_skill".to_string()),
1282            "UtilityScoringConfig default exempt_tools must contain invoke_skill"
1283        );
1284        assert!(
1285            cfg.exempt_tools.contains(&"load_skill".to_string()),
1286            "UtilityScoringConfig default exempt_tools must contain load_skill"
1287        );
1288    }
1289
1290    #[test]
1291    fn utility_partial_toml_exempt_tools_uses_default_not_empty_vec() {
1292        // Regression: #[serde(default)] on exempt_tools called Vec::default() (empty)
1293        // instead of the struct-level Default which sets ["invoke_skill", "load_skill"].
1294        let toml_str = r"
1295            [utility]
1296            enabled = true
1297            threshold = 0.1
1298        ";
1299        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1300        assert!(
1301            config
1302                .utility
1303                .exempt_tools
1304                .contains(&"invoke_skill".to_string()),
1305            "partial [tools.utility] TOML must populate exempt_tools with invoke_skill"
1306        );
1307        assert!(
1308            config
1309                .utility
1310                .exempt_tools
1311                .contains(&"load_skill".to_string()),
1312            "partial [tools.utility] TOML must populate exempt_tools with load_skill"
1313        );
1314    }
1315}
zeph_tools/config.rs

zeph_tools/
config.rs