vellaveto_engine/
lib.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4//
5// Copyright 2026 Paolo Vella
6// SPDX-License-Identifier: MPL-2.0
7
8//! Policy evaluation engine for the Vellaveto MCP tool firewall.
9//!
10//! Evaluates [`Action`](vellaveto_types::core::Action) requests against
11//! configured [`Policy`](vellaveto_types::core::Policy) rules and returns a
12//! [`Verdict`](vellaveto_types::core::Verdict) (Allow, Deny, or RequireApproval).
13//! Supports glob/regex path matching, domain/IP rules, ABAC attribute constraints,
14//! call-chain validation, decision caching (LRU+TTL), and Wasm policy plugins.
15//!
16//! The engine is synchronous by design — all evaluation completes in <5ms P99.
17
18pub mod abac;
19pub mod adaptive_rate;
20pub mod behavioral;
21pub mod cache;
22pub mod cascading;
23pub mod circuit_breaker;
24pub mod collusion;
25mod compiled;
26mod constraint_eval;
27mod context_check;
28pub mod coverage;
29pub mod deputy;
30mod domain;
31mod entropy_gate;
32mod error;
33pub mod impact;
34mod ip;
35pub mod least_agency;
36mod legacy;
37pub mod lint;
38mod matcher;
39mod normalize;
40mod path;
41mod policy_compile;
42mod rule_check;
43mod traced;
44pub mod verified_constraint_eval;
45pub mod verified_core;
46mod verified_entropy_gate;
47pub mod wasm_plugin;
48
49#[cfg(kani)]
50mod kani_proofs;
51
52pub use compiled::{
53    CompiledConstraint, CompiledContextCondition, CompiledIpRules, CompiledNetworkRules,
54    CompiledPathRules, CompiledPolicy,
55};
56pub use error::{EngineError, PolicyValidationError};
57pub use matcher::{CompiledToolMatcher, PatternMatcher};
58pub use path::DEFAULT_MAX_PATH_DECODE_ITERATIONS;
59
60use vellaveto_types::{
61    Action, ActionSummary, EvaluationContext, EvaluationTrace, Policy, PolicyType, Verdict,
62};
63
64use globset::{Glob, GlobMatcher};
65use regex::Regex;
66use std::collections::HashMap;
67use std::sync::RwLock;
68
69/// Maximum number of compiled glob matchers kept in the legacy runtime cache.
70const MAX_GLOB_MATCHER_CACHE_ENTRIES: usize = 2048;
71/// Maximum number of domain normalization results kept in the runtime cache.
72///
73/// Currently the cache starts empty and is not actively populated by
74/// evaluation paths (domain normalization is done inline).  The constant is
75/// retained as the documented eviction cap for the `domain_norm_cache`
76/// field so that any future population path has a bound ready.
77#[allow(dead_code)]
78const MAX_DOMAIN_NORM_CACHE_ENTRIES: usize = 4096;
79
80/// The core policy evaluation engine.
81///
82/// Evaluates [`Action`]s against a set of [`Policy`] rules to produce a [`Verdict`].
83///
84/// # Security Model
85///
86/// - **Fail-closed**: An empty policy set produces `Verdict::Deny`.
87/// - **Priority ordering**: Higher-priority policies are evaluated first.
88/// - **Pattern matching**: Policy IDs use `"tool:function"` convention with wildcard support.
89pub struct PolicyEngine {
90    strict_mode: bool,
91    compiled_policies: Vec<CompiledPolicy>,
92    /// Maps exact tool names to sorted indices in `compiled_policies`.
93    /// Only policies with an exact tool name pattern are indexed here.
94    tool_index: HashMap<String, Vec<usize>>,
95    /// Indices of policies that cannot be indexed by tool name
96    /// (Universal, prefix, suffix, or Any tool patterns).
97    /// Already sorted by position in `compiled_policies` (= priority order).
98    always_check: Vec<usize>,
99    /// When false (default), time-window context conditions always use wall-clock
100    /// time. When true, the engine honors `EvaluationContext.timestamp` from the
101    /// caller. **Only enable for deterministic testing** — in production, a client
102    /// could supply a fake timestamp to bypass time-window policies.
103    trust_context_timestamps: bool,
104    /// Maximum percent-decoding iterations in `normalize_path` before
105    /// fail-closing to `"/"`. Defaults to [`DEFAULT_MAX_PATH_DECODE_ITERATIONS`] (20).
106    max_path_decode_iterations: u32,
107    /// Legacy runtime cache for glob matcher compilation.
108    ///
109    /// This cache is used by `glob_is_match` on the non-precompiled path.
110    glob_matcher_cache: RwLock<HashMap<String, GlobMatcher>>,
111    /// Runtime cache for domain normalization results.
112    ///
113    /// Caches both successful normalization (Some) and invalid domains (None)
114    /// to avoid repeated IDNA parsing on hot network/domain constraint paths.
115    ///
116    /// SECURITY (FIND-R46-003): Bounded to [`MAX_DOMAIN_NORM_CACHE_ENTRIES`].
117    /// When capacity is exceeded, the cache is cleared to prevent unbounded
118    /// memory growth from attacker-controlled domain strings. Currently this
119    /// cache is not actively populated — domain normalization is done inline
120    /// via [`domain::normalize_domain_for_match`]. The eviction guard exists
121    /// as a defense-in-depth measure for future caching additions.
122    domain_norm_cache: RwLock<HashMap<String, Option<String>>>,
123    /// Optional topology guard for pre-policy tool call filtering.
124    /// When set, tool calls are checked against the live topology graph
125    /// before policy evaluation. Unknown tools may be denied or trigger
126    /// a re-crawl depending on configuration.
127    ///
128    /// Only available when the `discovery` feature is enabled.
129    #[cfg(feature = "discovery")]
130    topology_guard: Option<std::sync::Arc<vellaveto_discovery::guard::TopologyGuard>>,
131}
132
133impl std::fmt::Debug for PolicyEngine {
134    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
135        let mut s = f.debug_struct("PolicyEngine");
136        s.field("strict_mode", &self.strict_mode)
137            .field("compiled_policies_count", &self.compiled_policies.len())
138            .field("indexed_tools", &self.tool_index.len())
139            .field("always_check_count", &self.always_check.len())
140            .field(
141                "max_path_decode_iterations",
142                &self.max_path_decode_iterations,
143            )
144            .field(
145                "glob_matcher_cache_size",
146                &self
147                    .glob_matcher_cache
148                    .read()
149                    .map(|c| c.len())
150                    .unwrap_or_default(),
151            )
152            .field(
153                "domain_norm_cache_size",
154                &self
155                    .domain_norm_cache
156                    .read()
157                    .map(|c| c.len())
158                    .unwrap_or_default(),
159            );
160        #[cfg(feature = "discovery")]
161        {
162            s.field("topology_guard", &self.topology_guard.is_some());
163        }
164        s.finish()
165    }
166}
167
168impl PolicyEngine {
169    /// Create a new policy engine.
170    ///
171    /// When `strict_mode` is true, the engine applies stricter validation
172    /// on conditions and parameters.
173    pub fn new(strict_mode: bool) -> Self {
174        Self {
175            strict_mode,
176            compiled_policies: Vec::new(),
177            tool_index: HashMap::new(),
178            always_check: Vec::new(),
179            trust_context_timestamps: false,
180            max_path_decode_iterations: DEFAULT_MAX_PATH_DECODE_ITERATIONS,
181            glob_matcher_cache: RwLock::new(HashMap::with_capacity(256)),
182            // IMP-R208-001: Zero initial capacity — cache not actively populated.
183            domain_norm_cache: RwLock::new(HashMap::new()),
184            #[cfg(feature = "discovery")]
185            topology_guard: None,
186        }
187    }
188
189    /// Returns the engine's strict_mode setting.
190    pub fn strict_mode(&self) -> bool {
191        self.strict_mode
192    }
193
194    /// Validate a domain pattern used in network_rules.
195    ///
196    /// Rules per RFC 1035:
197    /// - Labels (parts between dots) must be 1-63 characters each
198    /// - Each label must be alphanumeric + hyphen only (no leading/trailing hyphen)
199    /// - Total domain length max 253 characters
200    /// - Wildcard `*.` prefix is allowed (only at the beginning)
201    /// - Empty string is rejected
202    ///
203    /// See the internal `domain::validate_domain_pattern` function for details.
204    pub fn validate_domain_pattern(pattern: &str) -> Result<(), String> {
205        domain::validate_domain_pattern(pattern)
206    }
207
208    /// Create a new policy engine with pre-compiled policies.
209    ///
210    /// All regex and glob patterns are compiled at construction time.
211    /// Invalid patterns cause immediate rejection with descriptive errors.
212    /// The compiled policies are sorted by priority (highest first, deny-overrides).
213    pub fn with_policies(
214        strict_mode: bool,
215        policies: &[Policy],
216    ) -> Result<Self, Vec<PolicyValidationError>> {
217        let compiled = Self::compile_policies(policies, strict_mode)?;
218        let (tool_index, always_check) = Self::build_tool_index(&compiled);
219        Ok(Self {
220            strict_mode,
221            compiled_policies: compiled,
222            tool_index,
223            always_check,
224            trust_context_timestamps: false,
225            max_path_decode_iterations: DEFAULT_MAX_PATH_DECODE_ITERATIONS,
226            glob_matcher_cache: RwLock::new(HashMap::with_capacity(256)),
227            // IMP-R208-001: Zero initial capacity — cache not actively populated.
228            domain_norm_cache: RwLock::new(HashMap::new()),
229            #[cfg(feature = "discovery")]
230            topology_guard: None,
231        })
232    }
233
234    /// Enable trusting `EvaluationContext.timestamp` for time-window checks.
235    ///
236    /// **WARNING:** Only use for deterministic testing. In production, a client
237    /// can supply a fake timestamp to bypass time-window policies.
238    #[cfg(test)]
239    pub fn set_trust_context_timestamps(&mut self, trust: bool) {
240        self.trust_context_timestamps = trust;
241    }
242
243    /// Set the topology guard for pre-policy tool call filtering.
244    ///
245    /// When set, `evaluate_action` checks the tool against the topology graph
246    /// before policy evaluation. Unknown tools produce `Verdict::Deny` with a
247    /// topology-specific reason, unless the guard returns `Bypassed`.
248    #[cfg(feature = "discovery")]
249    pub fn set_topology_guard(
250        &mut self,
251        guard: std::sync::Arc<vellaveto_discovery::guard::TopologyGuard>,
252    ) {
253        self.topology_guard = Some(guard);
254    }
255
256    /// Check the topology guard (if set) before policy evaluation.
257    ///
258    /// Returns `Some(Verdict::Deny)` if the tool is unknown or ambiguous
259    /// and the guard is configured to block. Returns `None` to proceed
260    /// with normal policy evaluation.
261    #[cfg(feature = "discovery")]
262    fn check_topology(&self, action: &Action) -> Option<Verdict> {
263        let guard = self.topology_guard.as_ref()?;
264        let tool_name = &action.tool;
265        match guard.check(tool_name) {
266            vellaveto_discovery::guard::TopologyVerdict::Known { .. } => None,
267            vellaveto_discovery::guard::TopologyVerdict::Bypassed => None,
268            vellaveto_discovery::guard::TopologyVerdict::Unknown { suggestion, .. } => {
269                let reason = if let Some(closest) = suggestion {
270                    format!(
271                        "Tool '{tool_name}' not found in topology graph (did you mean '{closest}'?)"
272                    )
273                } else {
274                    format!("Tool '{tool_name}' not found in topology graph")
275                };
276                Some(Verdict::Deny { reason })
277            }
278            vellaveto_discovery::guard::TopologyVerdict::Ambiguous { matches, .. } => {
279                Some(Verdict::Deny {
280                    reason: format!(
281                        "Tool '{}' is ambiguous — matches servers: {}. Use qualified name (server::tool).",
282                        tool_name,
283                        matches.join(", ")
284                    ),
285                })
286            }
287        }
288    }
289
290    /// Set the maximum percent-decoding iterations for path normalization.
291    ///
292    /// Paths requiring more iterations fail-closed to `"/"`. The default is
293    /// [`DEFAULT_MAX_PATH_DECODE_ITERATIONS`] (20). A value of 0 disables
294    /// iterative decoding entirely (single pass only).
295    pub fn set_max_path_decode_iterations(&mut self, max: u32) {
296        self.max_path_decode_iterations = max;
297    }
298
299    /// Build a tool-name index for O(matching) evaluation.
300    fn build_tool_index(compiled: &[CompiledPolicy]) -> (HashMap<String, Vec<usize>>, Vec<usize>) {
301        let mut index: HashMap<String, Vec<usize>> = HashMap::with_capacity(compiled.len());
302        let mut always_check = Vec::with_capacity(compiled.len());
303        for (i, cp) in compiled.iter().enumerate() {
304            match &cp.tool_matcher {
305                CompiledToolMatcher::Universal => always_check.push(i),
306                CompiledToolMatcher::ToolOnly(PatternMatcher::Exact(name)) => {
307                    index.entry(name.clone()).or_default().push(i);
308                }
309                CompiledToolMatcher::ToolAndFunction(PatternMatcher::Exact(name), _) => {
310                    index.entry(name.clone()).or_default().push(i);
311                }
312                _ => always_check.push(i),
313            }
314        }
315        // SECURITY (FIND-R49-003): Assert sorted invariant in debug builds.
316        // The always_check list must be sorted by index for deterministic evaluation order.
317        // Tool index values must also be sorted per-key for the same reason.
318        debug_assert!(
319            always_check.windows(2).all(|w| w[0] < w[1]),
320            "always_check must be sorted"
321        );
322        debug_assert!(
323            index.values().all(|v| v.windows(2).all(|w| w[0] < w[1])),
324            "tool_index values must be sorted"
325        );
326        (index, always_check)
327    }
328
329    /// Sort policies by priority (highest first), with deny-overrides at equal priority,
330    /// and a stable tertiary tiebreaker by policy ID for deterministic ordering.
331    ///
332    /// Call this once when loading or modifying policies, then pass the sorted
333    /// slice to [`Self::evaluate_action`] to avoid re-sorting on every evaluation.
334    pub fn sort_policies(policies: &mut [Policy]) {
335        policies.sort_by(|a, b| {
336            let pri = b.priority.cmp(&a.priority);
337            if pri != std::cmp::Ordering::Equal {
338                return pri;
339            }
340            let a_deny = matches!(a.policy_type, PolicyType::Deny);
341            let b_deny = matches!(b.policy_type, PolicyType::Deny);
342            let deny_ord = b_deny.cmp(&a_deny);
343            if deny_ord != std::cmp::Ordering::Equal {
344                return deny_ord;
345            }
346            // Tertiary tiebreaker: lexicographic by ID for deterministic ordering
347            a.id.cmp(&b.id)
348        });
349    }
350
351    // VERIFIED [S1]: Deny-by-default — empty policy set produces Deny (MCPPolicyEngine.tla S1)
352    // VERIFIED [S2]: Priority ordering — higher priority wins (MCPPolicyEngine.tla S2)
353    // VERIFIED [S3]: Deny-overrides — Deny beats Allow at same priority (MCPPolicyEngine.tla S3)
354    // VERIFIED [S5]: Errors produce Deny — every Allow verdict has a matching Allow policy (MCPPolicyEngine.tla S5)
355    // VERIFIED [L1]: Progress — every action gets a verdict (MCPPolicyEngine.tla L1)
356    /// Evaluate an action against a set of policies.
357    ///
358    /// For best performance, pass policies that have been pre-sorted with
359    /// [`Self::sort_policies`]. If not pre-sorted, this method will sort a temporary
360    /// copy (which adds O(n log n) overhead per call).
361    ///
362    /// The first matching policy determines the verdict.
363    /// If no policy matches, the default is Deny (fail-closed).
364    #[must_use = "security verdicts must not be discarded"]
365    pub fn evaluate_action(
366        &self,
367        action: &Action,
368        policies: &[Policy],
369    ) -> Result<Verdict, EngineError> {
370        // Topology pre-filter: check if the tool exists in the topology graph.
371        // Unknown/ambiguous tools are denied before policy evaluation.
372        #[cfg(feature = "discovery")]
373        if let Some(deny) = self.check_topology(action) {
374            return Ok(deny);
375        }
376
377        // Fast path: use pre-compiled policies (zero Mutex, zero runtime compilation)
378        if !self.compiled_policies.is_empty() {
379            return self.evaluate_with_compiled(action);
380        }
381
382        // Legacy path: evaluate ad-hoc policies (compiles patterns on the fly)
383        if policies.is_empty() {
384            return Ok(Verdict::Deny {
385                reason: "No policies defined".to_string(),
386            });
387        }
388
389        // Check if already sorted (by priority desc, deny-first at equal priority,
390        // then by ID ascending as a tiebreaker — FIND-R44-057)
391        let is_sorted = policies.windows(2).all(|w| {
392            let pri = w[0].priority.cmp(&w[1].priority);
393            if pri == std::cmp::Ordering::Equal {
394                let a_deny = matches!(w[0].policy_type, PolicyType::Deny);
395                let b_deny = matches!(w[1].policy_type, PolicyType::Deny);
396                if a_deny == b_deny {
397                    // FIND-R44-057: Tertiary tiebreaker by ID for deterministic ordering
398                    w[0].id.cmp(&w[1].id) != std::cmp::Ordering::Greater
399                } else {
400                    b_deny <= a_deny
401                }
402            } else {
403                pri != std::cmp::Ordering::Less
404            }
405        });
406
407        if is_sorted {
408            for policy in policies {
409                if self.matches_action(action, policy) {
410                    if let Some(verdict) = self.apply_policy(action, policy)? {
411                        return Ok(verdict);
412                    }
413                    // None: on_no_match="continue", try next policy
414                }
415            }
416        } else {
417            let mut sorted: Vec<&Policy> = policies.iter().collect();
418            sorted.sort_by(|a, b| {
419                let pri = b.priority.cmp(&a.priority);
420                if pri != std::cmp::Ordering::Equal {
421                    return pri;
422                }
423                let a_deny = matches!(a.policy_type, PolicyType::Deny);
424                let b_deny = matches!(b.policy_type, PolicyType::Deny);
425                let deny_cmp = b_deny.cmp(&a_deny);
426                if deny_cmp != std::cmp::Ordering::Equal {
427                    return deny_cmp;
428                }
429                // FIND-R44-057: Tertiary tiebreaker by ID for deterministic ordering
430                a.id.cmp(&b.id)
431            });
432            for policy in &sorted {
433                if self.matches_action(action, policy) {
434                    if let Some(verdict) = self.apply_policy(action, policy)? {
435                        return Ok(verdict);
436                    }
437                    // None: on_no_match="continue", try next policy
438                }
439            }
440        }
441
442        Ok(Verdict::Deny {
443            reason: "No matching policy".to_string(),
444        })
445    }
446
447    /// Evaluate an action with optional session context.
448    ///
449    /// This is the context-aware counterpart to [`Self::evaluate_action`].
450    /// When `context` is `Some`, context conditions (time windows, call limits,
451    /// agent identity, action history) are evaluated. When `None`, behaves
452    /// identically to `evaluate_action`.
453    ///
454    /// # WARNING: `policies` parameter ignored when compiled policies exist
455    ///
456    /// When the engine was constructed with [`Self::with_policies`] (or any
457    /// builder that populates `compiled_policies`), the `policies` parameter
458    /// is **completely ignored**. The engine uses its pre-compiled policy set
459    /// instead.
460    #[deprecated(
461        since = "4.0.1",
462        note = "policies parameter is silently ignored when compiled policies exist. \
463                Use evaluate_action() for compiled engines or build a new engine \
464                with with_policies() for dynamic policy sets."
465    )]
466    #[must_use = "security verdicts must not be discarded"]
467    pub fn evaluate_action_with_context(
468        &self,
469        action: &Action,
470        policies: &[Policy],
471        context: Option<&EvaluationContext>,
472    ) -> Result<Verdict, EngineError> {
473        #[cfg(feature = "discovery")]
474        if let Some(deny) = self.check_topology(action) {
475            return Ok(deny);
476        }
477        if let Some(ctx) = context {
478            if let Err(reason) = ctx.validate() {
479                return Ok(Verdict::Deny { reason });
480            }
481        }
482        if context.is_none() {
483            return self.evaluate_action(action, policies);
484        }
485        if !self.compiled_policies.is_empty() {
486            return self.evaluate_with_compiled_ctx(action, context);
487        }
488        if let Some(ctx) = context {
489            if ctx.has_any_meaningful_fields() {
490                return Ok(Verdict::Deny {
491                    reason: "Policy engine has no compiled policies; \
492                             context conditions cannot be evaluated (fail-closed)"
493                        .to_string(),
494                });
495            }
496        }
497        self.evaluate_action(action, policies)
498    }
499
500    /// Evaluate an action with optional session context, returning only the verdict.
501    ///
502    /// This is the context-aware counterpart to [`Self::evaluate_action`].
503    /// When `context` is `Some`, context conditions (time windows, call limits,
504    /// agent identity, action history) are evaluated. When `None`, behaves
505    /// identically to `evaluate_action`.
506    ///
507    /// For the full decision trace, use [`Self::evaluate_action_traced_with_context`].
508    #[must_use = "security verdicts must not be discarded"]
509    pub fn evaluate_with_context(
510        &self,
511        action: &Action,
512        context: Option<&EvaluationContext>,
513    ) -> Result<Verdict, EngineError> {
514        self.evaluate_action_traced_with_context(action, context)
515            .map(|(verdict, _trace)| verdict)
516    }
517
518    /// Evaluate an action with full decision trace and optional session context.
519    #[must_use = "security verdicts must not be discarded"]
520    pub fn evaluate_action_traced_with_context(
521        &self,
522        action: &Action,
523        context: Option<&EvaluationContext>,
524    ) -> Result<(Verdict, EvaluationTrace), EngineError> {
525        // Topology pre-filter: check if the tool exists in the topology graph.
526        #[cfg(feature = "discovery")]
527        if let Some(deny) = self.check_topology(action) {
528            let param_keys: Vec<String> = action
529                .parameters
530                .as_object()
531                .map(|o| o.keys().cloned().collect::<Vec<String>>())
532                .unwrap_or_default();
533            let trace = EvaluationTrace {
534                action_summary: ActionSummary {
535                    tool: action.tool.clone(),
536                    function: action.function.clone(),
537                    param_count: param_keys.len(),
538                    param_keys,
539                },
540                policies_checked: 0,
541                policies_matched: 0,
542                matches: vec![],
543                verdict: deny.clone(),
544                duration_us: 0,
545            };
546            return Ok((deny, trace));
547        }
548
549        // SECURITY (FIND-R50-063): Validate context bounds before evaluation.
550        if let Some(ctx) = context {
551            if let Err(reason) = ctx.validate() {
552                let deny = Verdict::Deny {
553                    reason: reason.clone(),
554                };
555                let param_keys: Vec<String> = action
556                    .parameters
557                    .as_object()
558                    .map(|o| o.keys().cloned().collect::<Vec<String>>())
559                    .unwrap_or_default();
560                let trace = EvaluationTrace {
561                    action_summary: ActionSummary {
562                        tool: action.tool.clone(),
563                        function: action.function.clone(),
564                        param_count: param_keys.len(),
565                        param_keys,
566                    },
567                    policies_checked: 0,
568                    policies_matched: 0,
569                    matches: vec![],
570                    verdict: deny.clone(),
571                    duration_us: 0,
572                };
573                return Ok((deny, trace));
574            }
575        }
576        if context.is_none() {
577            return self.evaluate_action_traced(action);
578        }
579        // Traced context-aware path
580        self.evaluate_action_traced_ctx(action, context)
581    }
582
583    // ═══════════════════════════════════════════════════
584    // COMPILED EVALUATION PATH (zero Mutex, zero runtime compilation)
585    // ═══════════════════════════════════════════════════
586
587    /// Evaluate an action using pre-compiled policies. Zero Mutex acquisitions.
588    /// Compiled policies are already sorted at compile time.
589    ///
590    /// Uses the tool-name index when available: only checks policies whose tool
591    /// pattern could match `action.tool`, plus `always_check` (wildcard/prefix/suffix).
592    /// Falls back to linear scan when no index has been built.
593    fn evaluate_with_compiled(&self, action: &Action) -> Result<Verdict, EngineError> {
594        // SECURITY (FIND-SEM-003, R227-TYP-1): Normalize tool/function names through
595        // the full pipeline (NFKC + lowercase + homoglyph) before policy matching.
596        // This prevents fullwidth Unicode, circled letters (Ⓐ), and mathematical
597        // variants from bypassing exact-match Deny policies. Patterns are also
598        // normalized via normalize_full at compile time for consistency.
599        let norm_tool = crate::normalize::normalize_full(&action.tool);
600        let norm_func = crate::normalize::normalize_full(&action.function);
601
602        // If index was built, use it for O(matching) instead of O(all)
603        if !self.tool_index.is_empty() || !self.always_check.is_empty() {
604            let tool_specific = self.tool_index.get(&norm_tool);
605            let tool_slice = tool_specific.map_or(&[][..], |v| v.as_slice());
606            let always_slice = &self.always_check;
607
608            // Merge two sorted index slices, iterating in priority order.
609            // SECURITY (R26-ENG-1): When both slices reference the same policy index,
610            // increment BOTH pointers to avoid evaluating the policy twice.
611            let mut ti = 0;
612            let mut ai = 0;
613            loop {
614                let next_idx = match (tool_slice.get(ti), always_slice.get(ai)) {
615                    (Some(&t), Some(&a)) => {
616                        if t < a {
617                            ti += 1;
618                            t
619                        } else if t > a {
620                            ai += 1;
621                            a
622                        } else {
623                            // t == a: same policy in both slices, skip duplicate
624                            ti += 1;
625                            ai += 1;
626                            t
627                        }
628                    }
629                    (Some(&t), None) => {
630                        ti += 1;
631                        t
632                    }
633                    (None, Some(&a)) => {
634                        ai += 1;
635                        a
636                    }
637                    (None, None) => break,
638                };
639
640                let cp = &self.compiled_policies[next_idx];
641                if cp.tool_matcher.matches_normalized(&norm_tool, &norm_func) {
642                    if let Some(verdict) = self.apply_compiled_policy(action, cp)? {
643                        return Ok(verdict);
644                    }
645                    // None: on_no_match="continue", try next policy
646                }
647            }
648        } else {
649            // No index: linear scan (legacy compiled path)
650            for cp in &self.compiled_policies {
651                if cp.tool_matcher.matches_normalized(&norm_tool, &norm_func) {
652                    if let Some(verdict) = self.apply_compiled_policy(action, cp)? {
653                        return Ok(verdict);
654                    }
655                    // None: on_no_match="continue", try next policy
656                }
657            }
658        }
659
660        Ok(Verdict::Deny {
661            reason: "No matching policy".to_string(),
662        })
663    }
664
665    /// Evaluate with compiled policies and session context.
666    fn evaluate_with_compiled_ctx(
667        &self,
668        action: &Action,
669        context: Option<&EvaluationContext>,
670    ) -> Result<Verdict, EngineError> {
671        // SECURITY (FIND-SEM-003, R227-TYP-1): Normalize tool/function names through
672        // the full pipeline (same as evaluate_with_compiled).
673        let norm_tool = crate::normalize::normalize_full(&action.tool);
674        let norm_func = crate::normalize::normalize_full(&action.function);
675
676        if !self.tool_index.is_empty() || !self.always_check.is_empty() {
677            let tool_specific = self.tool_index.get(&norm_tool);
678            let tool_slice = tool_specific.map_or(&[][..], |v| v.as_slice());
679            let always_slice = &self.always_check;
680
681            // SECURITY (R26-ENG-1): Deduplicate merge — see evaluate_compiled().
682            let mut ti = 0;
683            let mut ai = 0;
684            loop {
685                let next_idx = match (tool_slice.get(ti), always_slice.get(ai)) {
686                    (Some(&t), Some(&a)) => {
687                        if t < a {
688                            ti += 1;
689                            t
690                        } else if t > a {
691                            ai += 1;
692                            a
693                        } else {
694                            ti += 1;
695                            ai += 1;
696                            t
697                        }
698                    }
699                    (Some(&t), None) => {
700                        ti += 1;
701                        t
702                    }
703                    (None, Some(&a)) => {
704                        ai += 1;
705                        a
706                    }
707                    (None, None) => break,
708                };
709
710                let cp = &self.compiled_policies[next_idx];
711                if cp.tool_matcher.matches_normalized(&norm_tool, &norm_func) {
712                    if let Some(verdict) = self.apply_compiled_policy_ctx(action, cp, context)? {
713                        return Ok(verdict);
714                    }
715                }
716            }
717        } else {
718            for cp in &self.compiled_policies {
719                if cp.tool_matcher.matches_normalized(&norm_tool, &norm_func) {
720                    if let Some(verdict) = self.apply_compiled_policy_ctx(action, cp, context)? {
721                        return Ok(verdict);
722                    }
723                }
724            }
725        }
726
727        Ok(Verdict::Deny {
728            reason: "No matching policy".to_string(),
729        })
730    }
731
732    /// Apply a matched compiled policy to produce a verdict (no context).
733    /// Returns `None` when a Conditional policy with `on_no_match: "continue"` has no
734    /// constraints fire, signaling the evaluation loop to try the next policy.
735    fn apply_compiled_policy(
736        &self,
737        action: &Action,
738        cp: &CompiledPolicy,
739    ) -> Result<Option<Verdict>, EngineError> {
740        self.apply_compiled_policy_ctx(action, cp, None)
741    }
742
743    /// Apply a matched compiled policy with optional context.
744    fn apply_compiled_policy_ctx(
745        &self,
746        action: &Action,
747        cp: &CompiledPolicy,
748        context: Option<&EvaluationContext>,
749    ) -> Result<Option<Verdict>, EngineError> {
750        // Check path rules before policy type dispatch.
751        // Blocked paths → deny immediately regardless of policy type.
752        if let Some(denial) = self.check_path_rules(action, cp) {
753            Self::debug_assert_verified_deny(cp, true, false);
754            return Ok(Some(denial));
755        }
756        // Check network rules before policy type dispatch.
757        if let Some(denial) = self.check_network_rules(action, cp) {
758            Self::debug_assert_verified_deny(cp, true, false);
759            return Ok(Some(denial));
760        }
761        // Check IP rules (DNS rebinding protection) after network rules.
762        if let Some(denial) = self.check_ip_rules(action, cp) {
763            Self::debug_assert_verified_deny(cp, true, false);
764            return Ok(Some(denial));
765        }
766        // Check context conditions (session-level) before policy type dispatch.
767        // SECURITY: If a policy declares context conditions but no context is
768        // provided, deny the action (fail-closed). Skipping would let callers
769        // bypass time-window / max-calls / agent-id restrictions by omitting context.
770        if !cp.context_conditions.is_empty() {
771            match context {
772                Some(ctx) => {
773                    // SECURITY (R231-ENG-3): Normalize tool name before passing to
774                    // context conditions, consistent with policy matching which uses
775                    // normalize_full(). Prevents future context conditions from
776                    // receiving raw attacker-controlled tool names.
777                    let norm_tool = crate::normalize::normalize_full(&action.tool);
778                    if let Some(denial) = self.check_context_conditions(ctx, cp, &norm_tool) {
779                        Self::debug_assert_verified_deny(cp, false, true);
780                        return Ok(Some(denial));
781                    }
782                }
783                None => {
784                    Self::debug_assert_verified_deny(cp, false, true);
785                    return Ok(Some(Verdict::Deny {
786                        reason: format!(
787                            "Policy '{}' requires evaluation context (has {} context condition(s)) but none was provided",
788                            cp.policy.name,
789                            cp.context_conditions.len()
790                        ),
791                    }));
792                }
793            }
794        }
795
796        match &cp.policy.policy_type {
797            PolicyType::Allow => {
798                Self::debug_assert_verified_allow(cp);
799                Ok(Some(Verdict::Allow))
800            }
801            PolicyType::Deny => {
802                Self::debug_assert_verified_policy_deny(cp);
803                Ok(Some(Verdict::Deny {
804                    reason: cp.deny_reason.clone(),
805                }))
806            }
807            PolicyType::Conditional { .. } => self.evaluate_compiled_conditions(action, cp),
808            // Handle future variants - fail closed (deny)
809            _ => {
810                // SECURITY (R239-XCUT-5): Genericize — policy name in debug only.
811                tracing::debug!(policy = %cp.policy.name, "Request denied (unknown policy type)");
812                Ok(Some(Verdict::Deny {
813                    reason: "Request denied (unknown policy type)".to_string(),
814                }))
815            }
816        }
817    }
818
819    /// Debug-assert: verified core confirms rule/context override produces Deny.
820    #[inline]
821    fn debug_assert_verified_deny(cp: &CompiledPolicy, rule_override: bool, ctx_deny: bool) {
822        debug_assert!({
823            let rm = verified_core::ResolvedMatch {
824                matched: true,
825                is_deny: matches!(cp.policy.policy_type, PolicyType::Deny),
826                is_conditional: matches!(cp.policy.policy_type, PolicyType::Conditional { .. }),
827                priority: cp.policy.priority.max(0) as u32,
828                rule_override_deny: rule_override,
829                context_deny: ctx_deny,
830                require_approval: false,
831                condition_fired: false,
832                condition_verdict: verified_core::VerdictKind::Deny,
833                on_no_match_continue: false,
834                all_constraints_skipped: false,
835            };
836            verified_core::compute_single_verdict(&rm)
837                == verified_core::VerdictOutcome::Decided(verified_core::VerdictKind::Deny)
838        });
839    }
840
841    /// Debug-assert: verified core confirms Allow policy produces Allow.
842    #[inline]
843    fn debug_assert_verified_allow(cp: &CompiledPolicy) {
844        debug_assert!({
845            let rm = verified_core::ResolvedMatch {
846                matched: true,
847                is_deny: false,
848                is_conditional: false,
849                priority: cp.policy.priority.max(0) as u32,
850                rule_override_deny: false,
851                context_deny: false,
852                require_approval: false,
853                condition_fired: false,
854                condition_verdict: verified_core::VerdictKind::Allow,
855                on_no_match_continue: false,
856                all_constraints_skipped: false,
857            };
858            verified_core::compute_single_verdict(&rm)
859                == verified_core::VerdictOutcome::Decided(verified_core::VerdictKind::Allow)
860        });
861    }
862
863    /// Debug-assert: verified core confirms Deny policy produces Deny.
864    #[inline]
865    fn debug_assert_verified_policy_deny(cp: &CompiledPolicy) {
866        debug_assert!({
867            let rm = verified_core::ResolvedMatch {
868                matched: true,
869                is_deny: true,
870                is_conditional: false,
871                priority: cp.policy.priority.max(0) as u32,
872                rule_override_deny: false,
873                context_deny: false,
874                require_approval: false,
875                condition_fired: false,
876                condition_verdict: verified_core::VerdictKind::Deny,
877                on_no_match_continue: false,
878                all_constraints_skipped: false,
879            };
880            verified_core::compute_single_verdict(&rm)
881                == verified_core::VerdictOutcome::Decided(verified_core::VerdictKind::Deny)
882        });
883    }
884
885    /// Normalize a file path: resolve `..`, `.`, reject null bytes, ensure deterministic form.
886    ///
887    /// Handles percent-encoding, null bytes, and path traversal attempts.
888    pub fn normalize_path(raw: &str) -> Result<String, EngineError> {
889        path::normalize_path(raw)
890    }
891
892    /// Normalize a file path with a configurable percent-decoding iteration limit.
893    ///
894    /// Use this variant when you need to control the maximum decode iterations
895    /// to prevent DoS from deeply nested percent-encoding.
896    pub fn normalize_path_bounded(raw: &str, max_iterations: u32) -> Result<String, EngineError> {
897        path::normalize_path_bounded(raw, max_iterations)
898    }
899
900    /// Extract the domain from a URL string.
901    ///
902    /// Returns the host portion of the URL, or the original string if parsing fails.
903    pub fn extract_domain(url: &str) -> String {
904        domain::extract_domain(url)
905    }
906
907    /// Match a domain against a pattern like `*.example.com` or `example.com`.
908    ///
909    /// Supports wildcard patterns with `*.` prefix for subdomain matching.
910    pub fn match_domain_pattern(domain_str: &str, pattern: &str) -> bool {
911        domain::match_domain_pattern(domain_str, pattern)
912    }
913
914    /// Normalize a domain for matching: lowercase, strip trailing dots, apply IDNA.
915    ///
916    /// See [`domain::normalize_domain_for_match`] for details.
917    fn normalize_domain_for_match(s: &str) -> Option<std::borrow::Cow<'_, str>> {
918        domain::normalize_domain_for_match(s)
919    }
920
921    /// Maximum regex pattern length to prevent ReDoS via overlength patterns.
922    const MAX_REGEX_LEN: usize = 1024;
923
924    /// Validate a regex pattern for ReDoS safety.
925    ///
926    /// Rejects patterns that are too long (>1024 chars) or contain constructs
927    /// known to cause exponential backtracking:
928    ///
929    /// 1. **Nested quantifiers** like `(a+)+`, `(a*)*`, `(a+)*`, `(a*)+`
930    /// 2. **Overlapping alternation with quantifiers** like `(a|a)+` or `(a|ab)+`
931    ///
932    /// **Known limitations (FIND-R46-007):** This is a heuristic check, not a
933    /// full NFA analysis. It does NOT detect all possible ReDoS patterns:
934    /// - Alternation with overlapping character classes (e.g., `([a-z]|[a-m])+`)
935    /// - Backreferences with quantifiers
936    /// - Lookahead/lookbehind with quantifiers
937    /// - Possessive quantifiers (these are actually safe but not recognized)
938    ///
939    /// The `regex` crate uses a DFA/NFA hybrid that is immune to most ReDoS,
940    /// but pattern compilation itself can be expensive for very complex patterns,
941    /// hence the length limit.
942    fn validate_regex_safety(pattern: &str) -> Result<(), String> {
943        if pattern.len() > Self::MAX_REGEX_LEN {
944            return Err(format!(
945                "Regex pattern exceeds maximum length of {} chars ({} chars)",
946                Self::MAX_REGEX_LEN,
947                pattern.len()
948            ));
949        }
950
951        // Detect nested quantifiers: a quantifier applied to a group that
952        // itself contains a quantifier. Simplified check for common patterns.
953        let quantifiers = ['+', '*'];
954        let mut paren_depth = 0i32;
955        let mut has_inner_quantifier = false;
956        let chars: Vec<char> = pattern.chars().collect();
957        // SECURITY (R8-5): Use a skip_next flag to correctly handle escape
958        // sequences. The previous approach checked chars[i-1] == '\\' but
959        // failed for double-escapes like `\\\\(` (literal backslash + open paren).
960        let mut skip_next = false;
961
962        // Track alternation branches within groups to detect overlapping alternation.
963        // SECURITY (FIND-R46-007): Detect `(branch1|branch2)+` where branches share
964        // a common prefix, which can cause backtracking even without nested quantifiers.
965        let mut group_has_alternation = false;
966
967        for i in 0..chars.len() {
968            if skip_next {
969                skip_next = false;
970                continue;
971            }
972            match chars[i] {
973                '\\' => {
974                    // Skip the NEXT character (the escaped one)
975                    skip_next = true;
976                    continue;
977                }
978                '(' => {
979                    paren_depth += 1;
980                    has_inner_quantifier = false;
981                    group_has_alternation = false;
982                }
983                ')' => {
984                    paren_depth -= 1;
985                    // SECURITY (FIND-R58-ENG-002): Reject unbalanced closing parens.
986                    // Negative paren_depth disables alternation/inner-quantifier
987                    // tracking, allowing ReDoS patterns to bypass the safety check.
988                    if paren_depth < 0 {
989                        return Err(format!(
990                            "Invalid regex pattern — unbalanced parentheses: '{}'",
991                            &pattern[..pattern.len().min(100)]
992                        ));
993                    }
994                    // Check if the next char is a quantifier
995                    if i + 1 < chars.len() && quantifiers.contains(&chars[i + 1]) {
996                        if has_inner_quantifier {
997                            return Err(format!(
998                                "Regex pattern contains nested quantifiers (potential ReDoS): '{}'",
999                                &pattern[..pattern.len().min(100)]
1000                            ));
1001                        }
1002                        // FIND-R46-007: Alternation with a quantifier on the group
1003                        // can cause backtracking if branches overlap.
1004                        if group_has_alternation {
1005                            return Err(format!(
1006                                "Regex pattern contains alternation with outer quantifier (potential ReDoS): '{}'",
1007                                &pattern[..pattern.len().min(100)]
1008                            ));
1009                        }
1010                    }
1011                }
1012                '|' if paren_depth > 0 => {
1013                    group_has_alternation = true;
1014                }
1015                c if quantifiers.contains(&c) && paren_depth > 0 => {
1016                    has_inner_quantifier = true;
1017                }
1018                _ => {}
1019            }
1020        }
1021
1022        // SECURITY (FIND-R58-ENG-004): Reject patterns with unclosed parentheses.
1023        if paren_depth != 0 {
1024            return Err(format!(
1025                "Invalid regex pattern — unbalanced parentheses ({} unclosed): '{}'",
1026                paren_depth,
1027                &pattern[..pattern.len().min(100)]
1028            ));
1029        }
1030
1031        Ok(())
1032    }
1033
1034    /// Compile a regex pattern and test whether it matches the input.
1035    ///
1036    /// Legacy path: compiles the pattern on each call (no caching).
1037    /// For zero-overhead evaluation, use `with_policies()` to pre-compile.
1038    ///
1039    /// Validates the pattern for ReDoS safety before compilation (H2).
1040    fn regex_is_match(
1041        &self,
1042        pattern: &str,
1043        input: &str,
1044        policy_id: &str,
1045    ) -> Result<bool, EngineError> {
1046        Self::validate_regex_safety(pattern).map_err(|reason| EngineError::InvalidCondition {
1047            policy_id: policy_id.to_string(),
1048            reason,
1049        })?;
1050        let re = Regex::new(pattern).map_err(|e| EngineError::InvalidCondition {
1051            policy_id: policy_id.to_string(),
1052            reason: format!("Invalid regex pattern '{pattern}': {e}"),
1053        })?;
1054        Ok(re.is_match(input))
1055    }
1056
1057    /// Compile a glob pattern and test whether it matches the input.
1058    ///
1059    /// Legacy path: compiles the pattern on each call (no caching).
1060    /// For zero-overhead evaluation, use `with_policies()` to pre-compile.
1061    fn glob_is_match(
1062        &self,
1063        pattern: &str,
1064        input: &str,
1065        policy_id: &str,
1066    ) -> Result<bool, EngineError> {
1067        // SECURITY: On poisoned read lock, treat as cache miss rather than
1068        // accessing potentially corrupted data. The pattern will be compiled fresh.
1069        {
1070            let cache_result = self.glob_matcher_cache.read();
1071            match cache_result {
1072                Ok(cache) => {
1073                    if let Some(matcher) = cache.get(pattern) {
1074                        return Ok(matcher.is_match(input));
1075                    }
1076                }
1077                Err(e) => {
1078                    tracing::warn!(
1079                        "glob_matcher_cache read lock poisoned, treating as cache miss: {}",
1080                        e
1081                    );
1082                    // Fall through to compile the pattern fresh
1083                }
1084            }
1085        }
1086
1087        let matcher = Glob::new(pattern)
1088            .map_err(|e| EngineError::InvalidCondition {
1089                policy_id: policy_id.to_string(),
1090                reason: format!("Invalid glob pattern '{pattern}': {e}"),
1091            })?
1092            .compile_matcher();
1093        let is_match = matcher.is_match(input);
1094
1095        // SECURITY: On poisoned write lock, skip cache insertion rather than
1096        // writing into potentially corrupted state. The result is still correct,
1097        // just not cached.
1098        let cache_write = self.glob_matcher_cache.write();
1099        let mut cache = match cache_write {
1100            Ok(guard) => guard,
1101            Err(e) => {
1102                tracing::warn!(
1103                    "glob_matcher_cache write lock poisoned, skipping cache insert: {}",
1104                    e
1105                );
1106                return Ok(is_match);
1107            }
1108        };
1109        // FIND-R58-ENG-011: Full cache.clear() can cause a thundering herd of
1110        // recompilation on the legacy (non-precompiled) path. For production,
1111        // use with_policies() to pre-compile patterns and avoid this cache entirely.
1112        if cache.len() >= MAX_GLOB_MATCHER_CACHE_ENTRIES {
1113            // SECURITY (P3-ENG-004): Warn on cache eviction so cache thrashing is
1114            // observable in logs. This indicates a policy set with more unique glob
1115            // patterns than MAX_GLOB_MATCHER_CACHE_ENTRIES, which causes repeated
1116            // recompilation and may indicate a misconfiguration or DoS attempt.
1117            tracing::warn!(
1118                capacity = MAX_GLOB_MATCHER_CACHE_ENTRIES,
1119                "glob_matcher_cache capacity exceeded — clearing cache (cache thrashing possible; prefer with_policies() to pre-compile patterns)"
1120            );
1121            cache.clear();
1122        }
1123        cache.insert(pattern.to_string(), matcher);
1124
1125        Ok(is_match)
1126    }
1127
1128    /// Retrieve a parameter value by dot-separated path.
1129    ///
1130    /// Supports both simple keys (`"path"`) and nested paths (`"config.output.path"`).
1131    ///
1132    /// **Resolution order** (Exploit #5 fix): When the path contains dots, the function
1133    /// checks both an exact key match (e.g., `params["config.path"]`) and dot-split
1134    /// traversal (e.g., `params["config"]["path"]`).
1135    ///
1136    /// **Ambiguity handling (fail-closed):** If both interpretations resolve to different
1137    /// values, the function returns `None`. This prevents an attacker from shadowing a
1138    /// nested value with a literal dotted key (or vice versa). The `None` triggers
1139    /// deny behavior through the constraint's `on_missing` handling.
1140    ///
1141    /// When only one interpretation resolves, that value is returned.
1142    /// When both resolve to the same value, that value is returned.
1143    ///
1144    /// IMPROVEMENT_PLAN 4.1: Also supports bracket notation for array access:
1145    /// - `items[0]` — access first element of array "items"
1146    /// - `config.items[0].path` — traverse nested path with array access
1147    /// - `matrix[0][1]` — multi-dimensional array access
1148    pub fn get_param_by_path<'a>(
1149        params: &'a serde_json::Value,
1150        path: &str,
1151    ) -> Option<&'a serde_json::Value> {
1152        let exact_match = params.get(path);
1153
1154        // For non-dotted paths without brackets, exact match is the only interpretation
1155        if !path.contains('.') && !path.contains('[') {
1156            return exact_match;
1157        }
1158
1159        // Try dot-split traversal for nested objects with bracket notation support
1160        let traversal_match = Self::traverse_path(params, path);
1161
1162        match (exact_match, traversal_match) {
1163            // Both exist but differ: ambiguous — fail-closed (return None)
1164            (Some(exact), Some(traversal)) if exact != traversal => None,
1165            // Both exist and are equal: no ambiguity
1166            (Some(exact), Some(_)) => Some(exact),
1167            // Only one interpretation resolves
1168            (Some(exact), None) => Some(exact),
1169            (None, Some(traversal)) => Some(traversal),
1170            (None, None) => None,
1171        }
1172    }
1173
1174    /// Traverse a JSON value using a path with dot notation and bracket notation.
1175    ///
1176    /// Supports:
1177    /// - `foo.bar` — nested object access
1178    /// - `items[0]` — array index access
1179    /// - `foo.items[0].bar` — mixed traversal
1180    /// - `matrix[0][1]` — consecutive array access
1181    fn traverse_path<'a>(
1182        params: &'a serde_json::Value,
1183        path: &str,
1184    ) -> Option<&'a serde_json::Value> {
1185        let mut current = params;
1186
1187        // Split by dots first, then handle bracket notation within each segment
1188        for segment in path.split('.') {
1189            if segment.is_empty() {
1190                continue;
1191            }
1192
1193            // Check for bracket notation: field[index] or just [index]
1194            if let Some(bracket_pos) = segment.find('[') {
1195                // Get the field name before the bracket (may be empty for [0][1] style)
1196                let field_name = &segment[..bracket_pos];
1197
1198                // If there's a field name, traverse into it first
1199                if !field_name.is_empty() {
1200                    current = current.get(field_name)?;
1201                }
1202
1203                // Parse all bracket indices in this segment: [0][1][2]...
1204                let mut rest = &segment[bracket_pos..];
1205                while rest.starts_with('[') {
1206                    let close_pos = rest.find(']')?;
1207                    let index_str = &rest[1..close_pos];
1208                    let index: usize = index_str.parse().ok()?;
1209
1210                    // Access array element
1211                    current = current.get(index)?;
1212
1213                    // Move past this bracket pair
1214                    rest = &rest[close_pos + 1..];
1215                }
1216
1217                // If there's remaining content after brackets, it's malformed
1218                if !rest.is_empty() {
1219                    return None;
1220                }
1221            } else {
1222                // Simple field access
1223                current = current.get(segment)?;
1224            }
1225        }
1226
1227        Some(current)
1228    }
1229
1230    /// Maximum number of string values to collect during recursive parameter scanning.
1231    /// Prevents DoS from parameters with thousands of nested string values.
1232    const MAX_SCAN_VALUES: usize = 500;
1233
1234    /// Maximum nesting depth for recursive parameter scanning.
1235    ///
1236    /// 32 levels is sufficient for any reasonable MCP tool parameter structure
1237    /// (typical JSON has 3-5 levels; 32 provides ample headroom). Objects or
1238    /// arrays nested beyond this depth are silently skipped — their string
1239    /// values will not be collected for constraint evaluation or DLP scanning.
1240    /// This prevents stack/memory exhaustion from attacker-crafted deeply nested JSON.
1241    const MAX_JSON_DEPTH: usize = 32;
1242
1243    /// Maximum work stack size for iterative JSON traversal.
1244    ///
1245    /// SECURITY (FIND-R168-003): Caps the iterative traversal stack to prevent
1246    /// transient memory spikes from flat JSON objects/arrays with many children.
1247    /// Without this, a 1MB JSON with 100K keys at depth 0 would push all 100K
1248    /// items before the depth/results checks trigger.
1249    const MAX_STACK_SIZE: usize = 10_000;
1250
1251    /// Recursively collect all string values from a JSON structure.
1252    ///
1253    /// Returns a list of `(path, value)` pairs where `path` is a dot-separated
1254    /// description of where the value was found (e.g., `"options.target"`), and
1255    /// a boolean indicating whether results were truncated at [`MAX_SCAN_VALUES`].
1256    /// Uses an iterative approach to avoid stack overflow on deep JSON.
1257    ///
1258    /// Bounded by [`MAX_SCAN_VALUES`] total values and [`MAX_JSON_DEPTH`] nesting depth.
1259    ///
1260    /// SECURITY (R234-ENG-4): Returns truncation flag so callers can fail-closed
1261    /// when the parameter space exceeds scan capacity.
1262    fn collect_all_string_values(params: &serde_json::Value) -> (Vec<(String, &str)>, bool) {
1263        // Pre-allocate for typical parameter sizes; bounded by MAX_SCAN_VALUES
1264        let mut results = Vec::with_capacity(16);
1265        let mut truncated = false;
1266        // Stack: (value, current_path, depth)
1267        let mut stack: Vec<(&serde_json::Value, String, usize)> = vec![(params, String::new(), 0)];
1268
1269        while let Some((val, path, depth)) = stack.pop() {
1270            if results.len() >= Self::MAX_SCAN_VALUES {
1271                truncated = true;
1272                break;
1273            }
1274            match val {
1275                serde_json::Value::String(s) => {
1276                    if !path.is_empty() {
1277                        results.push((path, s.as_str()));
1278                    }
1279                }
1280                serde_json::Value::Object(obj) => {
1281                    if depth >= Self::MAX_JSON_DEPTH {
1282                        continue;
1283                    }
1284                    for (key, child) in obj {
1285                        // SECURITY (FIND-R168-003): Bound stack inside push loop.
1286                        if stack.len() >= Self::MAX_STACK_SIZE {
1287                            break;
1288                        }
1289                        let child_path = if path.is_empty() {
1290                            key.clone()
1291                        } else {
1292                            let mut p = String::with_capacity(path.len() + 1 + key.len());
1293                            p.push_str(&path);
1294                            p.push('.');
1295                            p.push_str(key);
1296                            p
1297                        };
1298                        stack.push((child, child_path, depth + 1));
1299                    }
1300                }
1301                serde_json::Value::Array(arr) => {
1302                    if depth >= Self::MAX_JSON_DEPTH {
1303                        continue;
1304                    }
1305                    for (i, child) in arr.iter().enumerate() {
1306                        if stack.len() >= Self::MAX_STACK_SIZE {
1307                            break;
1308                        }
1309                        let child_path = if path.is_empty() {
1310                            format!("[{i}]")
1311                        } else {
1312                            format!("{path}[{i}]")
1313                        };
1314                        stack.push((child, child_path, depth + 1));
1315                    }
1316                }
1317                _ => {}
1318            }
1319        }
1320
1321        (results, truncated)
1322    }
1323
1324    /// Convert an `on_match` action string into a Verdict.
1325    fn make_constraint_verdict(on_match: &str, reason: &str) -> Result<Verdict, EngineError> {
1326        match on_match {
1327            "deny" => Ok(Verdict::Deny {
1328                reason: reason.to_string(),
1329            }),
1330            "require_approval" => Ok(Verdict::RequireApproval {
1331                reason: reason.to_string(),
1332            }),
1333            "allow" => Ok(Verdict::Allow),
1334            other => Err(EngineError::EvaluationError(format!(
1335                "Unknown on_match action: '{other}'"
1336            ))),
1337        }
1338    }
1339    /// Returns true if any compiled policy has IP rules configured.
1340    ///
1341    /// Used by proxy layers to skip DNS resolution when no policies require it.
1342    pub fn has_ip_rules(&self) -> bool {
1343        self.compiled_policies
1344            .iter()
1345            .any(|cp| cp.compiled_ip_rules.is_some())
1346    }
1347}
1348
1349#[cfg(test)]
1350#[allow(deprecated)] // evaluate_action_with_context: migration tracked in FIND-CREATIVE-005
1351#[path = "engine_tests.rs"]
1352mod tests;
vellaveto_engine/lib.rs

vellaveto_engine/
lib.rs