zeph_tools/
config.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use serde::{Deserialize, Serialize};
5
6use crate::permissions::{AutonomyLevel, PermissionPolicy, PermissionsConfig};
7use crate::policy::{PolicyConfig, PolicyRuleConfig};
8
9fn default_true() -> bool {
10    true
11}
12fn default_adversarial_timeout_ms() -> u64 {
13    3_000
14}
15
16fn default_timeout() -> u64 {
17    30
18}
19
20fn default_cache_ttl_secs() -> u64 {
21    300
22}
23
24fn default_confirm_patterns() -> Vec<String> {
25    vec![
26        "rm ".into(),
27        "git push -f".into(),
28        "git push --force".into(),
29        "drop table".into(),
30        "drop database".into(),
31        "truncate ".into(),
32        "$(".into(),
33        "`".into(),
34        "<(".into(),
35        ">(".into(),
36        "<<<".into(),
37        "eval ".into(),
38    ]
39}
40
41fn default_audit_destination() -> String {
42    "stdout".into()
43}
44
45fn default_overflow_threshold() -> usize {
46    50_000
47}
48
49fn default_retention_days() -> u64 {
50    7
51}
52
53fn default_max_overflow_bytes() -> usize {
54    10 * 1024 * 1024 // 10 MiB
55}
56
57/// Configuration for large tool response offload to `SQLite`.
58#[derive(Debug, Clone, Deserialize, Serialize)]
59pub struct OverflowConfig {
60    #[serde(default = "default_overflow_threshold")]
61    pub threshold: usize,
62    #[serde(default = "default_retention_days")]
63    pub retention_days: u64,
64    /// Maximum bytes per overflow entry. `0` means unlimited.
65    #[serde(default = "default_max_overflow_bytes")]
66    pub max_overflow_bytes: usize,
67}
68
69impl Default for OverflowConfig {
70    fn default() -> Self {
71        Self {
72            threshold: default_overflow_threshold(),
73            retention_days: default_retention_days(),
74            max_overflow_bytes: default_max_overflow_bytes(),
75        }
76    }
77}
78
79fn default_anomaly_window() -> usize {
80    10
81}
82
83fn default_anomaly_error_threshold() -> f64 {
84    0.5
85}
86
87fn default_anomaly_critical_threshold() -> f64 {
88    0.8
89}
90
91/// Configuration for the sliding-window anomaly detector.
92#[derive(Debug, Clone, Deserialize, Serialize)]
93pub struct AnomalyConfig {
94    #[serde(default = "default_true")]
95    pub enabled: bool,
96    #[serde(default = "default_anomaly_window")]
97    pub window_size: usize,
98    #[serde(default = "default_anomaly_error_threshold")]
99    pub error_threshold: f64,
100    #[serde(default = "default_anomaly_critical_threshold")]
101    pub critical_threshold: f64,
102    /// Emit a WARN log when a reasoning-enhanced model (o1, o3, `QwQ`, etc.) produces
103    /// a quality failure (`ToolNotFound`, `InvalidParameters`, `TypeMismatch`). Default: `true`.
104    ///
105    /// Based on arXiv:2510.22977 — CoT/RL reasoning amplifies tool hallucination.
106    #[serde(default = "default_true")]
107    pub reasoning_model_warning: bool,
108}
109
110impl Default for AnomalyConfig {
111    fn default() -> Self {
112        Self {
113            enabled: true,
114            window_size: default_anomaly_window(),
115            error_threshold: default_anomaly_error_threshold(),
116            critical_threshold: default_anomaly_critical_threshold(),
117            reasoning_model_warning: true,
118        }
119    }
120}
121
122/// Configuration for the tool result cache.
123#[derive(Debug, Clone, Deserialize, Serialize)]
124pub struct ResultCacheConfig {
125    /// Whether caching is enabled. Default: `true`.
126    #[serde(default = "default_true")]
127    pub enabled: bool,
128    /// Time-to-live in seconds. `0` means entries never expire. Default: `300`.
129    #[serde(default = "default_cache_ttl_secs")]
130    pub ttl_secs: u64,
131}
132
133impl Default for ResultCacheConfig {
134    fn default() -> Self {
135        Self {
136            enabled: true,
137            ttl_secs: default_cache_ttl_secs(),
138        }
139    }
140}
141
142fn default_tafc_complexity_threshold() -> f64 {
143    0.6
144}
145
146/// Configuration for Think-Augmented Function Calling (TAFC).
147#[derive(Debug, Clone, Deserialize, Serialize)]
148pub struct TafcConfig {
149    /// Enable TAFC schema augmentation (default: false).
150    #[serde(default)]
151    pub enabled: bool,
152    /// Complexity threshold tau in [0.0, 1.0]; tools with complexity >= tau are augmented.
153    /// Default: 0.6
154    #[serde(default = "default_tafc_complexity_threshold")]
155    pub complexity_threshold: f64,
156}
157
158impl Default for TafcConfig {
159    fn default() -> Self {
160        Self {
161            enabled: false,
162            complexity_threshold: default_tafc_complexity_threshold(),
163        }
164    }
165}
166
167impl TafcConfig {
168    /// Validate and clamp `complexity_threshold` to \[0.0, 1.0\]. Reset NaN/Infinity to 0.6.
169    #[must_use]
170    pub fn validated(mut self) -> Self {
171        if self.complexity_threshold.is_finite() {
172            self.complexity_threshold = self.complexity_threshold.clamp(0.0, 1.0);
173        } else {
174            self.complexity_threshold = 0.6;
175        }
176        self
177    }
178}
179
180fn default_utility_threshold() -> f32 {
181    0.1
182}
183
184fn default_utility_gain_weight() -> f32 {
185    1.0
186}
187
188fn default_utility_cost_weight() -> f32 {
189    0.5
190}
191
192fn default_utility_redundancy_weight() -> f32 {
193    0.3
194}
195
196fn default_utility_uncertainty_bonus() -> f32 {
197    0.2
198}
199
200/// Configuration for utility-guided tool dispatch (`[tools.utility]` TOML section).
201///
202/// Implements the utility gate from arXiv:2603.19896: each tool call is scored
203/// `U = gain_weight*gain - cost_weight*cost - redundancy_weight*redundancy + uncertainty_bonus*uncertainty`.
204/// Calls with `U < threshold` are skipped (fail-closed on scoring errors).
205#[derive(Debug, Clone, Deserialize, Serialize)]
206#[serde(default)]
207pub struct UtilityScoringConfig {
208    /// Enable utility-guided gating. Default: false (opt-in).
209    pub enabled: bool,
210    /// Minimum utility score required to execute a tool call. Default: 0.1.
211    #[serde(default = "default_utility_threshold")]
212    pub threshold: f32,
213    /// Weight for the estimated gain component. Must be >= 0. Default: 1.0.
214    #[serde(default = "default_utility_gain_weight")]
215    pub gain_weight: f32,
216    /// Weight for the step cost component. Must be >= 0. Default: 0.5.
217    #[serde(default = "default_utility_cost_weight")]
218    pub cost_weight: f32,
219    /// Weight for the redundancy penalty. Must be >= 0. Default: 0.3.
220    #[serde(default = "default_utility_redundancy_weight")]
221    pub redundancy_weight: f32,
222    /// Weight for the exploration bonus. Must be >= 0. Default: 0.2.
223    #[serde(default = "default_utility_uncertainty_bonus")]
224    pub uncertainty_bonus: f32,
225    /// Tool names that bypass the utility gate unconditionally (case-insensitive).
226    /// Auto-populated with file-read tools when `MagicDocs` is enabled. User-specified
227    /// entries are preserved and merged additively with any auto-populated names.
228    #[serde(default)]
229    pub exempt_tools: Vec<String>,
230}
231
232impl Default for UtilityScoringConfig {
233    fn default() -> Self {
234        Self {
235            enabled: false,
236            threshold: default_utility_threshold(),
237            gain_weight: default_utility_gain_weight(),
238            cost_weight: default_utility_cost_weight(),
239            redundancy_weight: default_utility_redundancy_weight(),
240            uncertainty_bonus: default_utility_uncertainty_bonus(),
241            exempt_tools: vec!["invoke_skill".to_string(), "load_skill".to_string()],
242        }
243    }
244}
245
246impl UtilityScoringConfig {
247    /// Validate that all weights and threshold are non-negative and finite.
248    ///
249    /// # Errors
250    ///
251    /// Returns a description of the first invalid field found.
252    pub fn validate(&self) -> Result<(), String> {
253        let fields = [
254            ("threshold", self.threshold),
255            ("gain_weight", self.gain_weight),
256            ("cost_weight", self.cost_weight),
257            ("redundancy_weight", self.redundancy_weight),
258            ("uncertainty_bonus", self.uncertainty_bonus),
259        ];
260        for (name, val) in fields {
261            if !val.is_finite() {
262                return Err(format!("[tools.utility] {name} must be finite, got {val}"));
263            }
264            if val < 0.0 {
265                return Err(format!("[tools.utility] {name} must be >= 0, got {val}"));
266            }
267        }
268        Ok(())
269    }
270}
271
272fn default_boost_per_dep() -> f32 {
273    0.15
274}
275
276fn default_max_total_boost() -> f32 {
277    0.2
278}
279
280/// Dependency specification for a single tool.
281#[derive(Debug, Clone, Default, Deserialize, Serialize)]
282pub struct ToolDependency {
283    /// Hard prerequisites: tool is hidden until ALL of these have completed successfully.
284    #[serde(default, skip_serializing_if = "Vec::is_empty")]
285    pub requires: Vec<String>,
286    /// Soft prerequisites: tool gets a similarity boost when these have completed.
287    #[serde(default, skip_serializing_if = "Vec::is_empty")]
288    pub prefers: Vec<String>,
289}
290
291/// Configuration for the tool dependency graph feature.
292#[derive(Debug, Clone, Deserialize, Serialize)]
293pub struct DependencyConfig {
294    /// Whether dependency gating is enabled. Default: false.
295    #[serde(default)]
296    pub enabled: bool,
297    /// Similarity boost added per satisfied `prefers` dependency. Default: 0.15.
298    #[serde(default = "default_boost_per_dep")]
299    pub boost_per_dep: f32,
300    /// Maximum total boost applied regardless of how many `prefers` deps are met. Default: 0.2.
301    #[serde(default = "default_max_total_boost")]
302    pub max_total_boost: f32,
303    /// Per-tool dependency rules. Key is `tool_id`.
304    #[serde(default)]
305    pub rules: std::collections::HashMap<String, ToolDependency>,
306}
307
308impl Default for DependencyConfig {
309    fn default() -> Self {
310        Self {
311            enabled: false,
312            boost_per_dep: default_boost_per_dep(),
313            max_total_boost: default_max_total_boost(),
314            rules: std::collections::HashMap::new(),
315        }
316    }
317}
318
319fn default_retry_max_attempts() -> usize {
320    2
321}
322
323fn default_retry_base_ms() -> u64 {
324    500
325}
326
327fn default_retry_max_ms() -> u64 {
328    5_000
329}
330
331fn default_retry_budget_secs() -> u64 {
332    30
333}
334
335/// Configuration for tool error retry behavior.
336#[derive(Debug, Clone, Deserialize, Serialize)]
337pub struct RetryConfig {
338    /// Maximum retry attempts for transient errors per tool call. 0 = disabled.
339    #[serde(default = "default_retry_max_attempts")]
340    pub max_attempts: usize,
341    /// Base delay (ms) for exponential backoff.
342    #[serde(default = "default_retry_base_ms")]
343    pub base_ms: u64,
344    /// Maximum delay cap (ms) for exponential backoff.
345    #[serde(default = "default_retry_max_ms")]
346    pub max_ms: u64,
347    /// Maximum wall-clock time (seconds) for all retries of a single tool call. 0 = unlimited.
348    #[serde(default = "default_retry_budget_secs")]
349    pub budget_secs: u64,
350    /// Provider name from `[[llm.providers]]` for LLM-based parameter reformatting on
351    /// `InvalidParameters`/`TypeMismatch` errors. Empty string = disabled.
352    #[serde(default)]
353    pub parameter_reformat_provider: String,
354}
355
356impl Default for RetryConfig {
357    fn default() -> Self {
358        Self {
359            max_attempts: default_retry_max_attempts(),
360            base_ms: default_retry_base_ms(),
361            max_ms: default_retry_max_ms(),
362            budget_secs: default_retry_budget_secs(),
363            parameter_reformat_provider: String::new(),
364        }
365    }
366}
367
368/// Configuration for the LLM-based adversarial policy agent.
369#[derive(Debug, Clone, Deserialize, Serialize)]
370pub struct AdversarialPolicyConfig {
371    /// Enable the adversarial policy agent. Default: `false`.
372    #[serde(default)]
373    pub enabled: bool,
374    /// Provider name from `[[llm.providers]]` for the policy validation LLM.
375    /// Should reference a fast, cheap model (e.g. `gpt-4o-mini`).
376    /// Empty string = fall back to the default provider.
377    #[serde(default)]
378    pub policy_provider: String,
379    /// Path to a plain-text policy file. Each non-empty, non-comment line is one policy.
380    pub policy_file: Option<String>,
381    /// Whether to allow tool calls when the policy LLM fails (timeout/error).
382    /// Default: `false` (fail-closed / deny on error).
383    ///
384    /// Setting this to `true` trades security for availability. Use only in
385    /// deployments where the declarative `PolicyEnforcer` already covers hard rules.
386    #[serde(default)]
387    pub fail_open: bool,
388    /// Timeout in milliseconds for a single policy LLM call. Default: 3000.
389    #[serde(default = "default_adversarial_timeout_ms")]
390    pub timeout_ms: u64,
391    /// Tool names that are always allowed through the adversarial policy gate,
392    /// regardless of policy content. Covers internal agent operations that are
393    /// not externally visible side effects.
394    #[serde(default = "AdversarialPolicyConfig::default_exempt_tools")]
395    pub exempt_tools: Vec<String>,
396}
397impl Default for AdversarialPolicyConfig {
398    fn default() -> Self {
399        Self {
400            enabled: false,
401            policy_provider: String::new(),
402            policy_file: None,
403            fail_open: false,
404            timeout_ms: default_adversarial_timeout_ms(),
405            exempt_tools: Self::default_exempt_tools(),
406        }
407    }
408}
409impl AdversarialPolicyConfig {
410    fn default_exempt_tools() -> Vec<String> {
411        vec![
412            "memory_save".into(),
413            "memory_search".into(),
414            "read_overflow".into(),
415            "load_skill".into(),
416            "invoke_skill".into(),
417            "schedule_deferred".into(),
418        ]
419    }
420}
421
422/// Per-path read allow/deny sandbox for the file tool.
423///
424/// Evaluation order: deny-then-allow. If a path matches `deny_read` and does NOT
425/// match `allow_read`, access is denied. Empty `deny_read` means no read restrictions.
426///
427/// All patterns are matched against the canonicalized (absolute, symlink-resolved) path.
428#[derive(Debug, Clone, Default, Deserialize, Serialize)]
429pub struct FileConfig {
430    /// Glob patterns for paths denied for reading. Evaluated first.
431    #[serde(default)]
432    pub deny_read: Vec<String>,
433    /// Glob patterns for paths allowed for reading. Evaluated second (overrides deny).
434    #[serde(default)]
435    pub allow_read: Vec<String>,
436}
437
438/// Top-level configuration for tool execution.
439#[derive(Debug, Deserialize, Serialize)]
440pub struct ToolsConfig {
441    #[serde(default = "default_true")]
442    pub enabled: bool,
443    #[serde(default = "default_true")]
444    pub summarize_output: bool,
445    #[serde(default)]
446    pub shell: ShellConfig,
447    #[serde(default)]
448    pub scrape: ScrapeConfig,
449    #[serde(default)]
450    pub audit: AuditConfig,
451    #[serde(default)]
452    pub permissions: Option<PermissionsConfig>,
453    #[serde(default)]
454    pub filters: crate::filter::FilterConfig,
455    #[serde(default)]
456    pub overflow: OverflowConfig,
457    #[serde(default)]
458    pub anomaly: AnomalyConfig,
459    #[serde(default)]
460    pub result_cache: ResultCacheConfig,
461    #[serde(default)]
462    pub tafc: TafcConfig,
463    #[serde(default)]
464    pub dependencies: DependencyConfig,
465    #[serde(default)]
466    pub retry: RetryConfig,
467    /// Declarative policy compiler for tool call authorization.
468    #[serde(default)]
469    pub policy: PolicyConfig,
470    /// LLM-based adversarial policy agent for natural-language policy enforcement.
471    #[serde(default)]
472    pub adversarial_policy: AdversarialPolicyConfig,
473    /// Utility-guided tool dispatch gate.
474    #[serde(default)]
475    pub utility: UtilityScoringConfig,
476    /// Per-path read allow/deny sandbox for the file tool.
477    #[serde(default)]
478    pub file: FileConfig,
479    /// OAP declarative pre-action authorization. Rules are merged into `PolicyEnforcer` at
480    /// startup. Authorization rules are appended after `policy.rules` — policy rules take
481    /// precedence (first-match-wins semantics). This means existing policy allow/deny rules
482    /// are evaluated before authorization rules.
483    #[serde(default)]
484    pub authorization: AuthorizationConfig,
485    /// Maximum tool calls allowed per agent session. `None` = unlimited (default).
486    /// Counted on the first attempt only — retries do not consume additional quota slots.
487    #[serde(default)]
488    pub max_tool_calls_per_session: Option<u32>,
489    /// Speculative tool execution configuration.
490    ///
491    /// Runtime-only; no cargo feature gate. Default mode is `off`.
492    #[serde(default)]
493    pub speculative: SpeculativeConfig,
494    /// OS-level subprocess sandbox configuration (`[tools.sandbox]` TOML section).
495    ///
496    /// When `enabled = true`, all shell commands are wrapped in an OS-native sandbox
497    /// (macOS Seatbelt or Linux bwrap + Landlock). Default: disabled.
498    #[serde(default)]
499    pub sandbox: SandboxConfig,
500    /// Egress network event logging configuration.
501    #[serde(default)]
502    pub egress: EgressConfig,
503}
504
505impl ToolsConfig {
506    /// Build a `PermissionPolicy` from explicit config or legacy shell fields.
507    #[must_use]
508    pub fn permission_policy(&self, autonomy_level: AutonomyLevel) -> PermissionPolicy {
509        let policy = if let Some(ref perms) = self.permissions {
510            PermissionPolicy::from(perms.clone())
511        } else {
512            PermissionPolicy::from_legacy(
513                &self.shell.blocked_commands,
514                &self.shell.confirm_patterns,
515            )
516        };
517        policy.with_autonomy(autonomy_level)
518    }
519}
520
521/// Shell-specific configuration: timeout, command blocklist, and allowlist overrides.
522#[derive(Debug, Deserialize, Serialize)]
523#[allow(clippy::struct_excessive_bools)]
524pub struct ShellConfig {
525    #[serde(default = "default_timeout")]
526    pub timeout: u64,
527    #[serde(default)]
528    pub blocked_commands: Vec<String>,
529    #[serde(default)]
530    pub allowed_commands: Vec<String>,
531    #[serde(default)]
532    pub allowed_paths: Vec<String>,
533    #[serde(default = "default_true")]
534    pub allow_network: bool,
535    #[serde(default = "default_confirm_patterns")]
536    pub confirm_patterns: Vec<String>,
537    /// Environment variable name prefixes to strip from subprocess environment.
538    /// Variables whose names start with any of these prefixes are removed before
539    /// spawning shell commands. Default covers common credential naming conventions.
540    #[serde(default = "ShellConfig::default_env_blocklist")]
541    pub env_blocklist: Vec<String>,
542    /// Enable transactional mode: snapshot files before write commands, rollback on failure.
543    #[serde(default)]
544    pub transactional: bool,
545    /// Glob patterns defining which paths are eligible for snapshotting.
546    /// Only files matching these patterns (relative to cwd) are captured.
547    /// Empty = snapshot all files referenced in the command.
548    #[serde(default)]
549    pub transaction_scope: Vec<String>,
550    /// Automatically rollback when exit code >= 2. Default: false.
551    /// Exit code 1 is excluded because many tools (grep, diff, test) use it for
552    /// non-error conditions.
553    #[serde(default)]
554    pub auto_rollback: bool,
555    /// Exit codes that trigger auto-rollback. Default: empty (uses >= 2 heuristic).
556    /// When non-empty, only these exact exit codes trigger rollback.
557    #[serde(default)]
558    pub auto_rollback_exit_codes: Vec<i32>,
559    /// When true, snapshot failure aborts execution with an error.
560    /// When false (default), snapshot failure emits a warning and execution proceeds.
561    #[serde(default)]
562    pub snapshot_required: bool,
563    /// Maximum cumulative bytes for transaction snapshots. 0 = unlimited.
564    #[serde(default)]
565    pub max_snapshot_bytes: u64,
566}
567
568impl ShellConfig {
569    #[must_use]
570    pub fn default_env_blocklist() -> Vec<String> {
571        vec![
572            "ZEPH_".into(),
573            "AWS_".into(),
574            "AZURE_".into(),
575            "GCP_".into(),
576            "GOOGLE_".into(),
577            "OPENAI_".into(),
578            "ANTHROPIC_".into(),
579            "HF_".into(),
580            "HUGGING".into(),
581        ]
582    }
583}
584
585/// Configuration for audit logging of tool executions.
586#[derive(Debug, Deserialize, Serialize)]
587pub struct AuditConfig {
588    #[serde(default = "default_true")]
589    pub enabled: bool,
590    #[serde(default = "default_audit_destination")]
591    pub destination: String,
592    /// When true, log a per-tool risk summary at startup.
593    /// Each entry includes: tool name, privilege level, and expected input sanitization.
594    /// This is a design-time risk inventory, NOT runtime static analysis or a guarantee
595    /// that sanitization is functioning correctly.
596    #[serde(default)]
597    pub tool_risk_summary: bool,
598}
599
600impl Default for ToolsConfig {
601    fn default() -> Self {
602        Self {
603            enabled: true,
604            summarize_output: true,
605            shell: ShellConfig::default(),
606            scrape: ScrapeConfig::default(),
607            audit: AuditConfig::default(),
608            permissions: None,
609            filters: crate::filter::FilterConfig::default(),
610            overflow: OverflowConfig::default(),
611            anomaly: AnomalyConfig::default(),
612            result_cache: ResultCacheConfig::default(),
613            tafc: TafcConfig::default(),
614            dependencies: DependencyConfig::default(),
615            retry: RetryConfig::default(),
616            policy: PolicyConfig::default(),
617            adversarial_policy: AdversarialPolicyConfig::default(),
618            utility: UtilityScoringConfig::default(),
619            file: FileConfig::default(),
620            authorization: AuthorizationConfig::default(),
621            max_tool_calls_per_session: None,
622            speculative: SpeculativeConfig::default(),
623            sandbox: SandboxConfig::default(),
624            egress: EgressConfig::default(),
625        }
626    }
627}
628
629fn default_max_in_flight() -> usize {
630    4
631}
632
633fn default_confidence_threshold() -> f32 {
634    0.55
635}
636
637fn default_max_wasted_per_minute() -> u64 {
638    100
639}
640
641fn default_ttl_seconds() -> u64 {
642    30
643}
644
645fn default_min_observations() -> u32 {
646    5
647}
648
649fn default_half_life_days() -> f64 {
650    14.0
651}
652
653/// Speculative tool execution mode.
654///
655/// Controls whether and how the agent pre-dispatches tool calls before the LLM
656/// finishes decoding the full tool-use block.
657#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
658#[serde(rename_all = "kebab-case")]
659pub enum SpeculationMode {
660    /// No speculation; uses existing synchronous path.
661    #[default]
662    Off,
663    /// LLM-decoding level: fires tools when streaming partial JSON has all required fields.
664    Decoding,
665    /// Application-level pattern (PASTE): predicts top-K calls from `SQLite` history.
666    Pattern,
667    /// Both decoding and pattern speculation active.
668    Both,
669}
670
671/// Pattern-based (PASTE) speculative execution config.
672///
673/// Controls the SQLite-backed tool sequence learning subsystem. Disabled by default for
674/// privacy and performance reasons; opt-in per deployment.
675#[derive(Debug, Clone, Deserialize, Serialize)]
676pub struct SpeculativePatternConfig {
677    /// Enable PASTE pattern learning and prediction. Default: false.
678    #[serde(default)]
679    pub enabled: bool,
680    /// Minimum observed occurrences before a prediction is issued.
681    #[serde(default = "default_min_observations")]
682    pub min_observations: u32,
683    /// Exponential decay half-life in days for pattern scoring.
684    #[serde(default = "default_half_life_days")]
685    pub half_life_days: f64,
686    /// LLM provider name (from `[[llm.providers]]`) for optional reranking.
687    /// Empty string disables LLM reranking; scoring-only path is used.
688    #[serde(default)]
689    pub rerank_provider: String,
690}
691
692impl Default for SpeculativePatternConfig {
693    fn default() -> Self {
694        Self {
695            enabled: false,
696            min_observations: default_min_observations(),
697            half_life_days: default_half_life_days(),
698            rerank_provider: String::new(),
699        }
700    }
701}
702
703/// Shell command regex allowlist for speculative execution.
704///
705/// Only commands matching at least one regex in this list are eligible for speculation.
706/// Default: empty (speculation disabled for shell by default).
707#[derive(Debug, Clone, Default, Deserialize, Serialize)]
708pub struct SpeculativeAllowlistConfig {
709    /// Regexes matched against the full `bash` command string. Empty = no shell speculation.
710    #[serde(default)]
711    pub shell: Vec<String>,
712}
713
714/// Top-level configuration for speculative tool execution.
715///
716/// All settings here are runtime-only: no cargo feature gates this section.
717/// The module always compiles; branches are never taken when `mode = "off"`.
718///
719/// # Examples
720///
721/// ```toml
722/// [tools.speculative]
723/// mode = "both"
724/// max_in_flight = 4
725/// ttl_seconds = 30
726///
727/// [tools.speculative.pattern]
728/// enabled = false
729/// ```
730#[derive(Debug, Clone, Deserialize, Serialize)]
731pub struct SpeculativeConfig {
732    /// Speculation mode. Default: `off`.
733    #[serde(default)]
734    pub mode: SpeculationMode,
735    /// Maximum concurrent in-flight speculative tasks. Bounded to `[1, 16]`.
736    #[serde(default = "default_max_in_flight")]
737    pub max_in_flight: usize,
738    /// Minimum confidence score `[0, 1]` to dispatch a speculative task.
739    #[serde(default = "default_confidence_threshold")]
740    pub confidence_threshold: f32,
741    /// Circuit-breaker: disable speculation for 60 s when wasted ms exceeds this per minute.
742    #[serde(default = "default_max_wasted_per_minute")]
743    pub max_wasted_per_minute: u64,
744    /// Per-handle wall-clock TTL in seconds before the handle is cancelled.
745    #[serde(default = "default_ttl_seconds")]
746    pub ttl_seconds: u64,
747    /// Emit `AuditEntry` for speculative dispatches (with `result: speculative_discarded`).
748    #[serde(default = "default_true")]
749    pub audit: bool,
750    /// PASTE pattern learning config.
751    #[serde(default)]
752    pub pattern: SpeculativePatternConfig,
753    /// Per-executor command allowlists.
754    #[serde(default)]
755    pub allowlist: SpeculativeAllowlistConfig,
756}
757
758impl Default for SpeculativeConfig {
759    fn default() -> Self {
760        Self {
761            mode: SpeculationMode::Off,
762            max_in_flight: default_max_in_flight(),
763            confidence_threshold: default_confidence_threshold(),
764            max_wasted_per_minute: default_max_wasted_per_minute(),
765            ttl_seconds: default_ttl_seconds(),
766            audit: true,
767            pattern: SpeculativePatternConfig::default(),
768            allowlist: SpeculativeAllowlistConfig::default(),
769        }
770    }
771}
772
773impl Default for ShellConfig {
774    fn default() -> Self {
775        Self {
776            timeout: default_timeout(),
777            blocked_commands: Vec::new(),
778            allowed_commands: Vec::new(),
779            allowed_paths: Vec::new(),
780            allow_network: true,
781            confirm_patterns: default_confirm_patterns(),
782            env_blocklist: Self::default_env_blocklist(),
783            transactional: false,
784            transaction_scope: Vec::new(),
785            auto_rollback: false,
786            auto_rollback_exit_codes: Vec::new(),
787            snapshot_required: false,
788            max_snapshot_bytes: 0,
789        }
790    }
791}
792
793impl Default for AuditConfig {
794    fn default() -> Self {
795        Self {
796            enabled: true,
797            destination: default_audit_destination(),
798            tool_risk_summary: false,
799        }
800    }
801}
802
803/// OAP-style declarative authorization. Rules are merged into `PolicyEnforcer` at startup.
804///
805/// Precedence: `policy.rules` are evaluated first (first-match-wins), then `authorization.rules`.
806/// Use `[tools.policy]` for deny-wins safety rules; use `[tools.authorization]` for
807/// capability-based allow/deny rules that layer on top.
808#[derive(Debug, Clone, Default, Deserialize, Serialize)]
809pub struct AuthorizationConfig {
810    /// Enable OAP authorization checks. When false, `rules` are ignored. Default: false.
811    #[serde(default)]
812    pub enabled: bool,
813    /// Per-tool authorization rules. Appended after `[tools.policy]` rules at startup.
814    #[serde(default)]
815    pub rules: Vec<PolicyRuleConfig>,
816}
817
818/// Configuration for egress network event logging.
819///
820/// Controls what outbound HTTP events are emitted to the audit JSONL stream and
821/// surfaced in the TUI Security panel. Domain allow/deny policy is NOT duplicated
822/// here — it remains solely in [`ScrapeConfig`].
823#[derive(Debug, Clone, Deserialize, Serialize)]
824#[serde(default)]
825#[allow(clippy::struct_excessive_bools)]
826pub struct EgressConfig {
827    /// Master switch for egress event emission. Default: `true`.
828    pub enabled: bool,
829    /// Emit [`EgressEvent`](crate::audit::EgressEvent)s for requests blocked by
830    /// SSRF/domain/scheme checks. Default: `true`.
831    pub log_blocked: bool,
832    /// Include `response_bytes` in the JSONL record. Default: `true`.
833    pub log_response_bytes: bool,
834    /// Show real hostname in `MetricsSnapshot::egress_recent` (TUI). When `false`,
835    /// `"***"` is stored instead. JSONL always keeps the real host. Default: `true`.
836    pub log_hosts_to_tui: bool,
837}
838
839impl Default for EgressConfig {
840    fn default() -> Self {
841        Self {
842            enabled: true,
843            log_blocked: true,
844            log_response_bytes: true,
845            log_hosts_to_tui: true,
846        }
847    }
848}
849
850fn default_scrape_timeout() -> u64 {
851    15
852}
853
854fn default_max_body_bytes() -> usize {
855    4_194_304
856}
857
858/// Configuration for the web scrape tool.
859#[derive(Debug, Deserialize, Serialize)]
860pub struct ScrapeConfig {
861    #[serde(default = "default_scrape_timeout")]
862    pub timeout: u64,
863    #[serde(default = "default_max_body_bytes")]
864    pub max_body_bytes: usize,
865    /// Domain allowlist. Empty = all public domains allowed (default, existing behavior).
866    /// When non-empty, ONLY URLs whose host matches an entry are permitted (deny-unknown).
867    /// Supports exact match (`"docs.rs"`) and wildcard prefix (`"*.rust-lang.org"`).
868    /// Wildcard `*` matches a single subdomain segment only.
869    ///
870    /// Operators SHOULD set an explicit allowlist in production deployments.
871    /// Empty allowlist with a non-empty `denied_domains` is a denylist-only configuration
872    /// which is NOT a security boundary — an attacker can use any domain not on the list.
873    #[serde(default)]
874    pub allowed_domains: Vec<String>,
875    /// Domain denylist. Always enforced, regardless of allowlist state.
876    /// Supports the same pattern syntax as `allowed_domains`.
877    #[serde(default)]
878    pub denied_domains: Vec<String>,
879}
880
881impl Default for ScrapeConfig {
882    fn default() -> Self {
883        Self {
884            timeout: default_scrape_timeout(),
885            max_body_bytes: default_max_body_bytes(),
886            allowed_domains: Vec::new(),
887            denied_domains: Vec::new(),
888        }
889    }
890}
891
892fn default_sandbox_profile() -> crate::sandbox::SandboxProfile {
893    crate::sandbox::SandboxProfile::Workspace
894}
895
896fn default_sandbox_backend() -> String {
897    "auto".into()
898}
899
900/// OS-level subprocess sandbox configuration (`[tools.sandbox]` TOML section).
901///
902/// When `enabled = true`, all shell commands are wrapped in an OS-native sandbox:
903/// - **macOS**: `sandbox-exec` (Seatbelt) with a generated `TinyScheme` profile.
904/// - **Linux** (requires `sandbox` cargo feature): `bwrap` + Landlock + seccomp BPF.
905///
906/// This sandbox applies **only to subprocess executors** (shell). In-process executors
907/// (`WebScrapeExecutor`, `FileExecutor`) are not covered — see `NFR-SB-1`.
908///
909/// # Examples
910///
911/// ```toml
912/// [tools.sandbox]
913/// enabled = true
914/// profile = "workspace"
915/// allow_read  = ["$HOME/.cache/zeph"]
916/// allow_write = ["./.local"]
917/// strict = true
918/// backend = "auto"
919/// ```
920#[derive(Debug, Clone, Deserialize, Serialize)]
921pub struct SandboxConfig {
922    /// Enable OS-level sandbox. Default: `false`.
923    ///
924    /// On Linux requires the `sandbox` cargo feature. When `true` but the feature is absent,
925    /// startup emits `WARN` and degrades to noop (fail-open). Use `strict = true` to
926    /// make the feature absence an error instead.
927    #[serde(default)]
928    pub enabled: bool,
929
930    /// Enforcement profile controlling the baseline restrictions.
931    #[serde(default = "default_sandbox_profile")]
932    pub profile: crate::sandbox::SandboxProfile,
933
934    /// Additional paths granted read access. Resolved to absolute paths at startup.
935    #[serde(default)]
936    pub allow_read: Vec<std::path::PathBuf>,
937
938    /// Additional paths granted write access. Resolved to absolute paths at startup.
939    #[serde(default)]
940    pub allow_write: Vec<std::path::PathBuf>,
941
942    /// When `true`, sandbox initialization failure aborts startup (fail-closed). Default: `true`.
943    #[serde(default = "default_true")]
944    pub strict: bool,
945
946    /// OS backend hint: `"auto"` / `"seatbelt"` / `"landlock-bwrap"` / `"noop"`.
947    ///
948    /// `"auto"` selects the best available backend for the current platform.
949    #[serde(default = "default_sandbox_backend")]
950    pub backend: String,
951}
952
953impl Default for SandboxConfig {
954    fn default() -> Self {
955        Self {
956            enabled: false,
957            profile: default_sandbox_profile(),
958            allow_read: Vec::new(),
959            allow_write: Vec::new(),
960            strict: true,
961            backend: default_sandbox_backend(),
962        }
963    }
964}
965
966#[cfg(test)]
967mod tests {
968    use super::*;
969
970    #[test]
971    fn deserialize_default_config() {
972        let toml_str = r#"
973            enabled = true
974
975            [shell]
976            timeout = 60
977            blocked_commands = ["rm -rf /", "sudo"]
978        "#;
979
980        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
981        assert!(config.enabled);
982        assert_eq!(config.shell.timeout, 60);
983        assert_eq!(config.shell.blocked_commands.len(), 2);
984        assert_eq!(config.shell.blocked_commands[0], "rm -rf /");
985        assert_eq!(config.shell.blocked_commands[1], "sudo");
986    }
987
988    #[test]
989    fn empty_blocked_commands() {
990        let toml_str = r"
991            [shell]
992            timeout = 30
993        ";
994
995        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
996        assert!(config.enabled);
997        assert_eq!(config.shell.timeout, 30);
998        assert!(config.shell.blocked_commands.is_empty());
999    }
1000
1001    #[test]
1002    fn default_tools_config() {
1003        let config = ToolsConfig::default();
1004        assert!(config.enabled);
1005        assert!(config.summarize_output);
1006        assert_eq!(config.shell.timeout, 30);
1007        assert!(config.shell.blocked_commands.is_empty());
1008        assert!(config.audit.enabled);
1009    }
1010
1011    #[test]
1012    fn tools_summarize_output_default_true() {
1013        let config = ToolsConfig::default();
1014        assert!(config.summarize_output);
1015    }
1016
1017    #[test]
1018    fn tools_summarize_output_parsing() {
1019        let toml_str = r"
1020            summarize_output = true
1021        ";
1022        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1023        assert!(config.summarize_output);
1024    }
1025
1026    #[test]
1027    fn default_shell_config() {
1028        let config = ShellConfig::default();
1029        assert_eq!(config.timeout, 30);
1030        assert!(config.blocked_commands.is_empty());
1031        assert!(config.allowed_paths.is_empty());
1032        assert!(config.allow_network);
1033        assert!(!config.confirm_patterns.is_empty());
1034    }
1035
1036    #[test]
1037    fn deserialize_omitted_fields_use_defaults() {
1038        let toml_str = "";
1039        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1040        assert!(config.enabled);
1041        assert_eq!(config.shell.timeout, 30);
1042        assert!(config.shell.blocked_commands.is_empty());
1043        assert!(config.shell.allow_network);
1044        assert!(!config.shell.confirm_patterns.is_empty());
1045        assert_eq!(config.scrape.timeout, 15);
1046        assert_eq!(config.scrape.max_body_bytes, 4_194_304);
1047        assert!(config.audit.enabled);
1048        assert_eq!(config.audit.destination, "stdout");
1049        assert!(config.summarize_output);
1050    }
1051
1052    #[test]
1053    fn default_scrape_config() {
1054        let config = ScrapeConfig::default();
1055        assert_eq!(config.timeout, 15);
1056        assert_eq!(config.max_body_bytes, 4_194_304);
1057    }
1058
1059    #[test]
1060    fn deserialize_scrape_config() {
1061        let toml_str = r"
1062            [scrape]
1063            timeout = 30
1064            max_body_bytes = 2097152
1065        ";
1066
1067        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1068        assert_eq!(config.scrape.timeout, 30);
1069        assert_eq!(config.scrape.max_body_bytes, 2_097_152);
1070    }
1071
1072    #[test]
1073    fn tools_config_default_includes_scrape() {
1074        let config = ToolsConfig::default();
1075        assert_eq!(config.scrape.timeout, 15);
1076        assert_eq!(config.scrape.max_body_bytes, 4_194_304);
1077    }
1078
1079    #[test]
1080    fn deserialize_allowed_commands() {
1081        let toml_str = r#"
1082            [shell]
1083            timeout = 30
1084            allowed_commands = ["curl", "wget"]
1085        "#;
1086
1087        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1088        assert_eq!(config.shell.allowed_commands, vec!["curl", "wget"]);
1089    }
1090
1091    #[test]
1092    fn default_allowed_commands_empty() {
1093        let config = ShellConfig::default();
1094        assert!(config.allowed_commands.is_empty());
1095    }
1096
1097    #[test]
1098    fn deserialize_shell_security_fields() {
1099        let toml_str = r#"
1100            [shell]
1101            allowed_paths = ["/tmp", "/home/user"]
1102            allow_network = false
1103            confirm_patterns = ["rm ", "drop table"]
1104        "#;
1105
1106        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1107        assert_eq!(config.shell.allowed_paths, vec!["/tmp", "/home/user"]);
1108        assert!(!config.shell.allow_network);
1109        assert_eq!(config.shell.confirm_patterns, vec!["rm ", "drop table"]);
1110    }
1111
1112    #[test]
1113    fn deserialize_audit_config() {
1114        let toml_str = r#"
1115            [audit]
1116            enabled = true
1117            destination = "/var/log/zeph-audit.log"
1118        "#;
1119
1120        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1121        assert!(config.audit.enabled);
1122        assert_eq!(config.audit.destination, "/var/log/zeph-audit.log");
1123    }
1124
1125    #[test]
1126    fn default_audit_config() {
1127        let config = AuditConfig::default();
1128        assert!(config.enabled);
1129        assert_eq!(config.destination, "stdout");
1130    }
1131
1132    #[test]
1133    fn permission_policy_from_legacy_fields() {
1134        let config = ToolsConfig {
1135            shell: ShellConfig {
1136                blocked_commands: vec!["sudo".to_owned()],
1137                confirm_patterns: vec!["rm ".to_owned()],
1138                ..ShellConfig::default()
1139            },
1140            ..ToolsConfig::default()
1141        };
1142        let policy = config.permission_policy(AutonomyLevel::Supervised);
1143        assert_eq!(
1144            policy.check("bash", "sudo apt"),
1145            crate::permissions::PermissionAction::Deny
1146        );
1147        assert_eq!(
1148            policy.check("bash", "rm file"),
1149            crate::permissions::PermissionAction::Ask
1150        );
1151    }
1152
1153    #[test]
1154    fn permission_policy_from_explicit_config() {
1155        let toml_str = r#"
1156            [permissions]
1157            [[permissions.bash]]
1158            pattern = "*sudo*"
1159            action = "deny"
1160        "#;
1161        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1162        let policy = config.permission_policy(AutonomyLevel::Supervised);
1163        assert_eq!(
1164            policy.check("bash", "sudo rm"),
1165            crate::permissions::PermissionAction::Deny
1166        );
1167    }
1168
1169    #[test]
1170    fn permission_policy_default_uses_legacy() {
1171        let config = ToolsConfig::default();
1172        assert!(config.permissions.is_none());
1173        let policy = config.permission_policy(AutonomyLevel::Supervised);
1174        // Default ShellConfig has confirm_patterns, so legacy rules are generated
1175        assert!(!config.shell.confirm_patterns.is_empty());
1176        assert!(policy.rules().contains_key("bash"));
1177    }
1178
1179    #[test]
1180    fn deserialize_overflow_config_full() {
1181        let toml_str = r"
1182            [overflow]
1183            threshold = 100000
1184            retention_days = 14
1185            max_overflow_bytes = 5242880
1186        ";
1187        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1188        assert_eq!(config.overflow.threshold, 100_000);
1189        assert_eq!(config.overflow.retention_days, 14);
1190        assert_eq!(config.overflow.max_overflow_bytes, 5_242_880);
1191    }
1192
1193    #[test]
1194    fn deserialize_overflow_config_unknown_dir_field_is_ignored() {
1195        // Old configs with `dir = "..."` must not fail deserialization.
1196        let toml_str = r#"
1197            [overflow]
1198            threshold = 75000
1199            dir = "/tmp/overflow"
1200        "#;
1201        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1202        assert_eq!(config.overflow.threshold, 75_000);
1203    }
1204
1205    #[test]
1206    fn deserialize_overflow_config_partial_uses_defaults() {
1207        let toml_str = r"
1208            [overflow]
1209            threshold = 75000
1210        ";
1211        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1212        assert_eq!(config.overflow.threshold, 75_000);
1213        assert_eq!(config.overflow.retention_days, 7);
1214    }
1215
1216    #[test]
1217    fn deserialize_overflow_config_omitted_uses_defaults() {
1218        let config: ToolsConfig = toml::from_str("").unwrap();
1219        assert_eq!(config.overflow.threshold, 50_000);
1220        assert_eq!(config.overflow.retention_days, 7);
1221        assert_eq!(config.overflow.max_overflow_bytes, 10 * 1024 * 1024);
1222    }
1223
1224    #[test]
1225    fn result_cache_config_defaults() {
1226        let config = ResultCacheConfig::default();
1227        assert!(config.enabled);
1228        assert_eq!(config.ttl_secs, 300);
1229    }
1230
1231    #[test]
1232    fn deserialize_result_cache_config() {
1233        let toml_str = r"
1234            [result_cache]
1235            enabled = false
1236            ttl_secs = 60
1237        ";
1238        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1239        assert!(!config.result_cache.enabled);
1240        assert_eq!(config.result_cache.ttl_secs, 60);
1241    }
1242
1243    #[test]
1244    fn result_cache_omitted_uses_defaults() {
1245        let config: ToolsConfig = toml::from_str("").unwrap();
1246        assert!(config.result_cache.enabled);
1247        assert_eq!(config.result_cache.ttl_secs, 300);
1248    }
1249
1250    #[test]
1251    fn result_cache_ttl_zero_is_valid() {
1252        let toml_str = r"
1253            [result_cache]
1254            ttl_secs = 0
1255        ";
1256        let config: ToolsConfig = toml::from_str(toml_str).unwrap();
1257        assert_eq!(config.result_cache.ttl_secs, 0);
1258    }
1259
1260    #[test]
1261    fn adversarial_policy_default_exempt_tools_contains_skill_ops() {
1262        let exempt = AdversarialPolicyConfig::default_exempt_tools();
1263        assert!(
1264            exempt.contains(&"load_skill".to_string()),
1265            "default exempt_tools must contain load_skill"
1266        );
1267        assert!(
1268            exempt.contains(&"invoke_skill".to_string()),
1269            "default exempt_tools must contain invoke_skill"
1270        );
1271    }
1272
1273    #[test]
1274    fn utility_scoring_default_exempt_tools_contains_skill_ops() {
1275        let cfg = UtilityScoringConfig::default();
1276        assert!(
1277            cfg.exempt_tools.contains(&"invoke_skill".to_string()),
1278            "UtilityScoringConfig default exempt_tools must contain invoke_skill"
1279        );
1280        assert!(
1281            cfg.exempt_tools.contains(&"load_skill".to_string()),
1282            "UtilityScoringConfig default exempt_tools must contain load_skill"
1283        );
1284    }
1285}
zeph_tools/config.rs

zeph_tools/
config.rs