vellaveto_engine/lib.rs
1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4//
5// Copyright 2026 Paolo Vella
6// SPDX-License-Identifier: MPL-2.0
7
8//! Policy evaluation engine for the Vellaveto MCP tool firewall.
9//!
10//! Evaluates [`Action`](vellaveto_types::core::Action) requests against
11//! configured [`Policy`](vellaveto_types::core::Policy) rules and returns a
12//! [`Verdict`](vellaveto_types::core::Verdict) (Allow, Deny, or RequireApproval).
13//! Supports glob/regex path matching, domain/IP rules, ABAC attribute constraints,
14//! call-chain validation, decision caching (LRU+TTL), and Wasm policy plugins.
15//!
16//! The engine is synchronous by design — all evaluation completes in <5ms P99.
17
18pub mod abac;
19pub mod acis;
20pub mod adaptive_rate;
21pub mod agent_baseline;
22pub mod behavioral;
23pub mod cache;
24pub mod cascade_graph;
25pub mod cascading;
26pub mod circuit_breaker;
27pub mod collusion;
28mod compiled;
29mod constraint_eval;
30pub mod contagion;
31mod context_check;
32pub mod coverage;
33pub mod cross_session_leak;
34pub mod cumulative_harm;
35pub mod delegation;
36pub mod denial_of_wallet;
37pub mod deputy;
38pub mod exfil_path;
39
40#[cfg(test)]
41mod channel_separation_tests;
42mod domain;
43mod entropy_gate;
44mod error;
45pub mod impact;
46mod ip;
47pub mod least_agency;
48mod legacy;
49pub mod lint;
50mod matcher;
51pub mod nhi_overpermission;
52mod normalize;
53mod path;
54mod policy_compile;
55mod rule_check;
56pub mod sequence;
57mod traced;
58mod verified_capability_context;
59mod verified_capability_delegation_context;
60pub mod verified_constraint_eval;
61mod verified_context_delegation;
62pub mod verified_core;
63mod verified_deputy;
64mod verified_entropy_gate;
65pub mod wasm_plugin;
66
67#[cfg(kani)]
68mod kani_proofs;
69
70pub use compiled::{
71 CompiledConstraint, CompiledContextCondition, CompiledIpRules, CompiledNetworkRules,
72 CompiledPathRules, CompiledPolicy,
73};
74pub use error::{EngineError, PolicyValidationError};
75pub use matcher::{CompiledToolMatcher, PatternMatcher};
76pub use path::DEFAULT_MAX_PATH_DECODE_ITERATIONS;
77
78use vellaveto_types::{
79 Action, ActionSummary, EvaluationContext, EvaluationTrace, Policy, PolicyType, Verdict,
80};
81
82use globset::{Glob, GlobMatcher};
83use regex::Regex;
84use std::collections::HashMap;
85use std::sync::RwLock;
86
87/// Maximum number of compiled glob matchers kept in the legacy runtime cache.
88const MAX_GLOB_MATCHER_CACHE_ENTRIES: usize = 2048;
89/// Maximum number of domain normalization results kept in the runtime cache.
90///
91/// Currently the cache starts empty and is not actively populated by
92/// evaluation paths (domain normalization is done inline). The constant is
93/// retained as the documented eviction cap for the `domain_norm_cache`
94/// field so that any future population path has a bound ready.
95#[allow(dead_code)]
96const MAX_DOMAIN_NORM_CACHE_ENTRIES: usize = 4096;
97
98/// The core policy evaluation engine.
99///
100/// Evaluates [`Action`]s against a set of [`Policy`] rules to produce a [`Verdict`].
101///
102/// # Security Model
103///
104/// - **Fail-closed**: An empty policy set produces `Verdict::Deny`.
105/// - **Priority ordering**: Higher-priority policies are evaluated first.
106/// - **Pattern matching**: Policy IDs use `"tool:function"` convention with wildcard support.
107pub struct PolicyEngine {
108 strict_mode: bool,
109 compiled_policies: Vec<CompiledPolicy>,
110 /// Maps exact tool names to sorted indices in `compiled_policies`.
111 /// Only policies with an exact tool name pattern are indexed here.
112 tool_index: HashMap<String, Vec<usize>>,
113 /// Indices of policies that cannot be indexed by tool name
114 /// (Universal, prefix, suffix, or Any tool patterns).
115 /// Already sorted by position in `compiled_policies` (= priority order).
116 always_check: Vec<usize>,
117 /// When false (default), time-window context conditions always use wall-clock
118 /// time. When true, the engine honors `EvaluationContext.timestamp` from the
119 /// caller. **Only enable for deterministic testing** — in production, a client
120 /// could supply a fake timestamp to bypass time-window policies.
121 trust_context_timestamps: bool,
122 /// Maximum percent-decoding iterations in `normalize_path` before
123 /// fail-closing to `"/"`. Defaults to [`DEFAULT_MAX_PATH_DECODE_ITERATIONS`] (20).
124 max_path_decode_iterations: u32,
125 /// Legacy runtime cache for glob matcher compilation.
126 ///
127 /// This cache is used by `glob_is_match` on the non-precompiled path.
128 glob_matcher_cache: RwLock<HashMap<String, GlobMatcher>>,
129 /// Runtime cache for domain normalization results.
130 ///
131 /// Caches both successful normalization (Some) and invalid domains (None)
132 /// to avoid repeated IDNA parsing on hot network/domain constraint paths.
133 ///
134 /// SECURITY (FIND-R46-003): Bounded to [`MAX_DOMAIN_NORM_CACHE_ENTRIES`].
135 /// When capacity is exceeded, the cache is cleared to prevent unbounded
136 /// memory growth from attacker-controlled domain strings. Currently this
137 /// cache is not actively populated — domain normalization is done inline
138 /// via [`domain::normalize_domain_for_match`]. The eviction guard exists
139 /// as a defense-in-depth measure for future caching additions.
140 domain_norm_cache: RwLock<HashMap<String, Option<String>>>,
141 /// Optional topology guard for pre-policy tool call filtering.
142 /// When set, tool calls are checked against the live topology graph
143 /// before policy evaluation. Unknown tools may be denied or trigger
144 /// a re-crawl depending on configuration.
145 ///
146 /// Only available when the `discovery` feature is enabled.
147 #[cfg(feature = "discovery")]
148 topology_guard: Option<std::sync::Arc<vellaveto_discovery::guard::TopologyGuard>>,
149}
150
151impl std::fmt::Debug for PolicyEngine {
152 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
153 let mut s = f.debug_struct("PolicyEngine");
154 s.field("strict_mode", &self.strict_mode)
155 .field("compiled_policies_count", &self.compiled_policies.len())
156 .field("indexed_tools", &self.tool_index.len())
157 .field("always_check_count", &self.always_check.len())
158 .field(
159 "max_path_decode_iterations",
160 &self.max_path_decode_iterations,
161 )
162 .field(
163 "glob_matcher_cache_size",
164 &self
165 .glob_matcher_cache
166 .read()
167 .map(|c| c.len())
168 .unwrap_or_default(),
169 )
170 .field(
171 "domain_norm_cache_size",
172 &self
173 .domain_norm_cache
174 .read()
175 .map(|c| c.len())
176 .unwrap_or_default(),
177 );
178 #[cfg(feature = "discovery")]
179 {
180 s.field("topology_guard", &self.topology_guard.is_some());
181 }
182 s.finish()
183 }
184}
185
186impl PolicyEngine {
187 /// Create a new policy engine.
188 ///
189 /// When `strict_mode` is true, the engine applies stricter validation
190 /// on conditions and parameters.
191 pub fn new(strict_mode: bool) -> Self {
192 Self {
193 strict_mode,
194 compiled_policies: Vec::new(),
195 tool_index: HashMap::new(),
196 always_check: Vec::new(),
197 trust_context_timestamps: false,
198 max_path_decode_iterations: DEFAULT_MAX_PATH_DECODE_ITERATIONS,
199 glob_matcher_cache: RwLock::new(HashMap::with_capacity(256)),
200 // IMP-R208-001: Zero initial capacity — cache not actively populated.
201 domain_norm_cache: RwLock::new(HashMap::new()),
202 #[cfg(feature = "discovery")]
203 topology_guard: None,
204 }
205 }
206
207 /// Returns the engine's strict_mode setting.
208 pub fn strict_mode(&self) -> bool {
209 self.strict_mode
210 }
211
212 /// Validate a domain pattern used in network_rules.
213 ///
214 /// Rules per RFC 1035:
215 /// - Labels (parts between dots) must be 1-63 characters each
216 /// - Each label must be alphanumeric + hyphen only (no leading/trailing hyphen)
217 /// - Total domain length max 253 characters
218 /// - Wildcard `*.` prefix is allowed (only at the beginning)
219 /// - Empty string is rejected
220 ///
221 /// See the internal `domain::validate_domain_pattern` function for details.
222 pub fn validate_domain_pattern(pattern: &str) -> Result<(), String> {
223 domain::validate_domain_pattern(pattern)
224 }
225
226 /// Create a new policy engine with pre-compiled policies.
227 ///
228 /// All regex and glob patterns are compiled at construction time.
229 /// Invalid patterns cause immediate rejection with descriptive errors.
230 /// The compiled policies are sorted by priority (highest first, deny-overrides).
231 pub fn with_policies(
232 strict_mode: bool,
233 policies: &[Policy],
234 ) -> Result<Self, Vec<PolicyValidationError>> {
235 let compiled = Self::compile_policies(policies, strict_mode)?;
236 let (tool_index, always_check) = Self::build_tool_index(&compiled);
237 Ok(Self {
238 strict_mode,
239 compiled_policies: compiled,
240 tool_index,
241 always_check,
242 trust_context_timestamps: false,
243 max_path_decode_iterations: DEFAULT_MAX_PATH_DECODE_ITERATIONS,
244 glob_matcher_cache: RwLock::new(HashMap::with_capacity(256)),
245 // IMP-R208-001: Zero initial capacity — cache not actively populated.
246 domain_norm_cache: RwLock::new(HashMap::new()),
247 #[cfg(feature = "discovery")]
248 topology_guard: None,
249 })
250 }
251
252 /// Enable trusting `EvaluationContext.timestamp` for time-window checks.
253 ///
254 /// **WARNING:** Only use for deterministic testing. In production, a client
255 /// can supply a fake timestamp to bypass time-window policies.
256 #[cfg(test)]
257 pub fn set_trust_context_timestamps(&mut self, trust: bool) {
258 self.trust_context_timestamps = trust;
259 }
260
261 /// Set the topology guard for pre-policy tool call filtering.
262 ///
263 /// When set, `evaluate_action` checks the tool against the topology graph
264 /// before policy evaluation. Unknown tools produce `Verdict::Deny` with a
265 /// topology-specific reason, unless the guard returns `Bypassed`.
266 #[cfg(feature = "discovery")]
267 pub fn set_topology_guard(
268 &mut self,
269 guard: std::sync::Arc<vellaveto_discovery::guard::TopologyGuard>,
270 ) {
271 self.topology_guard = Some(guard);
272 }
273
274 /// Check the topology guard (if set) before policy evaluation.
275 ///
276 /// Returns `Some(Verdict::Deny)` if the tool is unknown or ambiguous
277 /// and the guard is configured to block. Returns `None` to proceed
278 /// with normal policy evaluation.
279 #[cfg(feature = "discovery")]
280 fn check_topology(&self, action: &Action) -> Option<Verdict> {
281 let guard = self.topology_guard.as_ref()?;
282 let tool_name = &action.tool;
283 match guard.check(tool_name) {
284 vellaveto_discovery::guard::TopologyVerdict::Known { .. } => None,
285 vellaveto_discovery::guard::TopologyVerdict::Bypassed => None,
286 vellaveto_discovery::guard::TopologyVerdict::Unknown { suggestion, .. } => {
287 let reason = if let Some(closest) = suggestion {
288 format!(
289 "Tool '{tool_name}' not found in topology graph (did you mean '{closest}'?)"
290 )
291 } else {
292 format!("Tool '{tool_name}' not found in topology graph")
293 };
294 Some(Verdict::Deny { reason })
295 }
296 vellaveto_discovery::guard::TopologyVerdict::Ambiguous { matches, .. } => {
297 Some(Verdict::Deny {
298 reason: format!(
299 "Tool '{}' is ambiguous — matches servers: {}. Use qualified name (server::tool).",
300 tool_name,
301 matches.join(", ")
302 ),
303 })
304 }
305 }
306 }
307
308 /// Set the maximum percent-decoding iterations for path normalization.
309 ///
310 /// Paths requiring more iterations fail-closed to `"/"`. The default is
311 /// [`DEFAULT_MAX_PATH_DECODE_ITERATIONS`] (20). A value of 0 disables
312 /// iterative decoding entirely (single pass only).
313 pub fn set_max_path_decode_iterations(&mut self, max: u32) {
314 self.max_path_decode_iterations = max;
315 }
316
317 /// Build a tool-name index for O(matching) evaluation.
318 fn build_tool_index(compiled: &[CompiledPolicy]) -> (HashMap<String, Vec<usize>>, Vec<usize>) {
319 let mut index: HashMap<String, Vec<usize>> = HashMap::with_capacity(compiled.len());
320 let mut always_check = Vec::with_capacity(compiled.len());
321 for (i, cp) in compiled.iter().enumerate() {
322 match &cp.tool_matcher {
323 CompiledToolMatcher::Universal => always_check.push(i),
324 CompiledToolMatcher::ToolOnly(PatternMatcher::Exact(name)) => {
325 index.entry(name.clone()).or_default().push(i);
326 }
327 CompiledToolMatcher::ToolAndFunction(PatternMatcher::Exact(name), _) => {
328 index.entry(name.clone()).or_default().push(i);
329 }
330 _ => always_check.push(i),
331 }
332 }
333 // SECURITY (FIND-R49-003): Assert sorted invariant in debug builds.
334 // The always_check list must be sorted by index for deterministic evaluation order.
335 // Tool index values must also be sorted per-key for the same reason.
336 debug_assert!(
337 always_check.windows(2).all(|w| w[0] < w[1]),
338 "always_check must be sorted"
339 );
340 debug_assert!(
341 index.values().all(|v| v.windows(2).all(|w| w[0] < w[1])),
342 "tool_index values must be sorted"
343 );
344 (index, always_check)
345 }
346
347 /// Sort policies by priority (highest first), with deny-overrides at equal priority,
348 /// and a stable tertiary tiebreaker by policy ID for deterministic ordering.
349 ///
350 /// Call this once when loading or modifying policies, then pass the sorted
351 /// slice to [`Self::evaluate_action`] to avoid re-sorting on every evaluation.
352 pub fn sort_policies(policies: &mut [Policy]) {
353 policies.sort_by(|a, b| {
354 let pri = b.priority.cmp(&a.priority);
355 if pri != std::cmp::Ordering::Equal {
356 return pri;
357 }
358 let a_deny = matches!(a.policy_type, PolicyType::Deny);
359 let b_deny = matches!(b.policy_type, PolicyType::Deny);
360 let deny_ord = b_deny.cmp(&a_deny);
361 if deny_ord != std::cmp::Ordering::Equal {
362 return deny_ord;
363 }
364 // Tertiary tiebreaker: lexicographic by ID for deterministic ordering
365 a.id.cmp(&b.id)
366 });
367 }
368
369 // VERIFIED [S1]: Deny-by-default — empty policy set produces Deny (MCPPolicyEngine.tla S1)
370 // VERIFIED [S2]: Priority ordering — higher priority wins (MCPPolicyEngine.tla S2)
371 // VERIFIED [S3]: Deny-overrides — Deny beats Allow at same priority (MCPPolicyEngine.tla S3)
372 // VERIFIED [S5]: Errors produce Deny — every Allow verdict has a matching Allow policy (MCPPolicyEngine.tla S5)
373 // VERIFIED [L1]: Progress — every action gets a verdict (MCPPolicyEngine.tla L1)
374 /// Evaluate an action against a set of policies.
375 ///
376 /// For best performance, pass policies that have been pre-sorted with
377 /// [`Self::sort_policies`]. If not pre-sorted, this method will sort a temporary
378 /// copy (which adds O(n log n) overhead per call).
379 ///
380 /// The first matching policy determines the verdict.
381 /// If no policy matches, the default is Deny (fail-closed).
382 #[must_use = "security verdicts must not be discarded"]
383 pub fn evaluate_action(
384 &self,
385 action: &Action,
386 policies: &[Policy],
387 ) -> Result<Verdict, EngineError> {
388 // Topology pre-filter: check if the tool exists in the topology graph.
389 // Unknown/ambiguous tools are denied before policy evaluation.
390 #[cfg(feature = "discovery")]
391 if let Some(deny) = self.check_topology(action) {
392 return Ok(deny);
393 }
394
395 // Fast path: use pre-compiled policies (zero Mutex, zero runtime compilation)
396 if !self.compiled_policies.is_empty() {
397 return self.evaluate_with_compiled(action);
398 }
399
400 // Legacy path: evaluate ad-hoc policies (compiles patterns on the fly)
401 if policies.is_empty() {
402 return Ok(Verdict::Deny {
403 reason: "No policies defined".to_string(),
404 });
405 }
406
407 // Check if already sorted (by priority desc, deny-first at equal priority,
408 // then by ID ascending as a tiebreaker — FIND-R44-057)
409 let is_sorted = policies.windows(2).all(|w| {
410 let pri = w[0].priority.cmp(&w[1].priority);
411 if pri == std::cmp::Ordering::Equal {
412 let a_deny = matches!(w[0].policy_type, PolicyType::Deny);
413 let b_deny = matches!(w[1].policy_type, PolicyType::Deny);
414 if a_deny == b_deny {
415 // FIND-R44-057: Tertiary tiebreaker by ID for deterministic ordering
416 w[0].id.cmp(&w[1].id) != std::cmp::Ordering::Greater
417 } else {
418 b_deny <= a_deny
419 }
420 } else {
421 pri != std::cmp::Ordering::Less
422 }
423 });
424
425 if is_sorted {
426 for policy in policies {
427 if self.matches_action(action, policy) {
428 if let Some(verdict) = self.apply_policy(action, policy)? {
429 return Ok(verdict);
430 }
431 // None: on_no_match="continue", try next policy
432 }
433 }
434 } else {
435 let mut sorted: Vec<&Policy> = policies.iter().collect();
436 sorted.sort_by(|a, b| {
437 let pri = b.priority.cmp(&a.priority);
438 if pri != std::cmp::Ordering::Equal {
439 return pri;
440 }
441 let a_deny = matches!(a.policy_type, PolicyType::Deny);
442 let b_deny = matches!(b.policy_type, PolicyType::Deny);
443 let deny_cmp = b_deny.cmp(&a_deny);
444 if deny_cmp != std::cmp::Ordering::Equal {
445 return deny_cmp;
446 }
447 // FIND-R44-057: Tertiary tiebreaker by ID for deterministic ordering
448 a.id.cmp(&b.id)
449 });
450 for policy in &sorted {
451 if self.matches_action(action, policy) {
452 if let Some(verdict) = self.apply_policy(action, policy)? {
453 return Ok(verdict);
454 }
455 // None: on_no_match="continue", try next policy
456 }
457 }
458 }
459
460 Ok(Verdict::Deny {
461 reason: "No matching policy".to_string(),
462 })
463 }
464
465 /// Evaluate an action with optional session context.
466 ///
467 /// This is the context-aware counterpart to [`Self::evaluate_action`].
468 /// When `context` is `Some`, context conditions (time windows, call limits,
469 /// agent identity, action history) are evaluated. When `None`, behaves
470 /// identically to `evaluate_action`.
471 ///
472 /// # WARNING: `policies` parameter ignored when compiled policies exist
473 ///
474 /// When the engine was constructed with [`Self::with_policies`] (or any
475 /// builder that populates `compiled_policies`), the `policies` parameter
476 /// is **completely ignored**. The engine uses its pre-compiled policy set
477 /// instead.
478 #[deprecated(
479 since = "4.0.1",
480 note = "policies parameter is silently ignored when compiled policies exist. \
481 Use evaluate_action() for compiled engines or build a new engine \
482 with with_policies() for dynamic policy sets."
483 )]
484 #[must_use = "security verdicts must not be discarded"]
485 pub fn evaluate_action_with_context(
486 &self,
487 action: &Action,
488 policies: &[Policy],
489 context: Option<&EvaluationContext>,
490 ) -> Result<Verdict, EngineError> {
491 #[cfg(feature = "discovery")]
492 if let Some(deny) = self.check_topology(action) {
493 return Ok(deny);
494 }
495 if let Some(ctx) = context {
496 if let Err(reason) = ctx.validate() {
497 return Ok(Verdict::Deny { reason });
498 }
499 }
500 if context.is_none() {
501 return self.evaluate_action(action, policies);
502 }
503 if !self.compiled_policies.is_empty() {
504 return self.evaluate_with_compiled_ctx(action, context);
505 }
506 if let Some(ctx) = context {
507 if ctx.has_any_meaningful_fields() {
508 return Ok(Verdict::Deny {
509 reason: "Policy engine has no compiled policies; \
510 context conditions cannot be evaluated (fail-closed)"
511 .to_string(),
512 });
513 }
514 }
515 self.evaluate_action(action, policies)
516 }
517
518 /// Evaluate an action with optional session context, returning only the verdict.
519 ///
520 /// This is the context-aware counterpart to [`Self::evaluate_action`].
521 /// When `context` is `Some`, context conditions (time windows, call limits,
522 /// agent identity, action history) are evaluated. When `None`, behaves
523 /// identically to `evaluate_action`.
524 ///
525 /// For the full decision trace, use [`Self::evaluate_action_traced_with_context`].
526 #[must_use = "security verdicts must not be discarded"]
527 pub fn evaluate_with_context(
528 &self,
529 action: &Action,
530 context: Option<&EvaluationContext>,
531 ) -> Result<Verdict, EngineError> {
532 self.evaluate_action_traced_with_context(action, context)
533 .map(|(verdict, _trace)| verdict)
534 }
535
536 /// Evaluate an action with full decision trace and optional session context.
537 #[must_use = "security verdicts must not be discarded"]
538 pub fn evaluate_action_traced_with_context(
539 &self,
540 action: &Action,
541 context: Option<&EvaluationContext>,
542 ) -> Result<(Verdict, EvaluationTrace), EngineError> {
543 // Topology pre-filter: check if the tool exists in the topology graph.
544 #[cfg(feature = "discovery")]
545 if let Some(deny) = self.check_topology(action) {
546 let param_keys: Vec<String> = action
547 .parameters
548 .as_object()
549 .map(|o| o.keys().cloned().collect::<Vec<String>>())
550 .unwrap_or_default();
551 let trace = EvaluationTrace {
552 action_summary: ActionSummary {
553 tool: action.tool.clone(),
554 function: action.function.clone(),
555 param_count: param_keys.len(),
556 param_keys,
557 },
558 policies_checked: 0,
559 policies_matched: 0,
560 matches: vec![],
561 verdict: deny.clone(),
562 duration_us: 0,
563 };
564 return Ok((deny, trace));
565 }
566
567 // SECURITY (FIND-R50-063): Validate context bounds before evaluation.
568 if let Some(ctx) = context {
569 if let Err(reason) = ctx.validate() {
570 let deny = Verdict::Deny {
571 reason: reason.clone(),
572 };
573 let param_keys: Vec<String> = action
574 .parameters
575 .as_object()
576 .map(|o| o.keys().cloned().collect::<Vec<String>>())
577 .unwrap_or_default();
578 let trace = EvaluationTrace {
579 action_summary: ActionSummary {
580 tool: action.tool.clone(),
581 function: action.function.clone(),
582 param_count: param_keys.len(),
583 param_keys,
584 },
585 policies_checked: 0,
586 policies_matched: 0,
587 matches: vec![],
588 verdict: deny.clone(),
589 duration_us: 0,
590 };
591 return Ok((deny, trace));
592 }
593 }
594 if context.is_none() {
595 return self.evaluate_action_traced(action);
596 }
597 // Traced context-aware path
598 self.evaluate_action_traced_ctx(action, context)
599 }
600
601 // ═══════════════════════════════════════════════════
602 // COMPILED EVALUATION PATH (zero Mutex, zero runtime compilation)
603 // ═══════════════════════════════════════════════════
604
605 /// Evaluate an action using pre-compiled policies. Zero Mutex acquisitions.
606 /// Compiled policies are already sorted at compile time.
607 ///
608 /// Uses the tool-name index when available: only checks policies whose tool
609 /// pattern could match `action.tool`, plus `always_check` (wildcard/prefix/suffix).
610 /// Falls back to linear scan when no index has been built.
611 fn evaluate_with_compiled(&self, action: &Action) -> Result<Verdict, EngineError> {
612 // SECURITY (FIND-SEM-003, R227-TYP-1): Normalize tool/function names through
613 // the full pipeline (NFKC + lowercase + homoglyph) before policy matching.
614 // This prevents fullwidth Unicode, circled letters (Ⓐ), and mathematical
615 // variants from bypassing exact-match Deny policies. Patterns are also
616 // normalized via normalize_full at compile time for consistency.
617 let norm_tool = crate::normalize::normalize_full(&action.tool);
618 let norm_func = crate::normalize::normalize_full(&action.function);
619
620 // If index was built, use it for O(matching) instead of O(all)
621 if !self.tool_index.is_empty() || !self.always_check.is_empty() {
622 let tool_specific = self.tool_index.get(&norm_tool);
623 let tool_slice = tool_specific.map_or(&[][..], |v| v.as_slice());
624 let always_slice = &self.always_check;
625
626 // Merge two sorted index slices, iterating in priority order.
627 // SECURITY (R26-ENG-1): When both slices reference the same policy index,
628 // increment BOTH pointers to avoid evaluating the policy twice.
629 let mut ti = 0;
630 let mut ai = 0;
631 loop {
632 let next_idx = match (tool_slice.get(ti), always_slice.get(ai)) {
633 (Some(&t), Some(&a)) => {
634 if t < a {
635 ti += 1;
636 t
637 } else if t > a {
638 ai += 1;
639 a
640 } else {
641 // t == a: same policy in both slices, skip duplicate
642 ti += 1;
643 ai += 1;
644 t
645 }
646 }
647 (Some(&t), None) => {
648 ti += 1;
649 t
650 }
651 (None, Some(&a)) => {
652 ai += 1;
653 a
654 }
655 (None, None) => break,
656 };
657
658 let cp = &self.compiled_policies[next_idx];
659 if cp.tool_matcher.matches_normalized(&norm_tool, &norm_func) {
660 if let Some(verdict) = self.apply_compiled_policy(action, cp)? {
661 return Ok(verdict);
662 }
663 // None: on_no_match="continue", try next policy
664 }
665 }
666 } else {
667 // No index: linear scan (legacy compiled path)
668 for cp in &self.compiled_policies {
669 if cp.tool_matcher.matches_normalized(&norm_tool, &norm_func) {
670 if let Some(verdict) = self.apply_compiled_policy(action, cp)? {
671 return Ok(verdict);
672 }
673 // None: on_no_match="continue", try next policy
674 }
675 }
676 }
677
678 Ok(Verdict::Deny {
679 reason: "No matching policy".to_string(),
680 })
681 }
682
683 /// Evaluate with compiled policies and session context.
684 fn evaluate_with_compiled_ctx(
685 &self,
686 action: &Action,
687 context: Option<&EvaluationContext>,
688 ) -> Result<Verdict, EngineError> {
689 // SECURITY (FIND-SEM-003, R227-TYP-1): Normalize tool/function names through
690 // the full pipeline (same as evaluate_with_compiled).
691 let norm_tool = crate::normalize::normalize_full(&action.tool);
692 let norm_func = crate::normalize::normalize_full(&action.function);
693
694 if !self.tool_index.is_empty() || !self.always_check.is_empty() {
695 let tool_specific = self.tool_index.get(&norm_tool);
696 let tool_slice = tool_specific.map_or(&[][..], |v| v.as_slice());
697 let always_slice = &self.always_check;
698
699 // SECURITY (R26-ENG-1): Deduplicate merge — see evaluate_compiled().
700 let mut ti = 0;
701 let mut ai = 0;
702 loop {
703 let next_idx = match (tool_slice.get(ti), always_slice.get(ai)) {
704 (Some(&t), Some(&a)) => {
705 if t < a {
706 ti += 1;
707 t
708 } else if t > a {
709 ai += 1;
710 a
711 } else {
712 ti += 1;
713 ai += 1;
714 t
715 }
716 }
717 (Some(&t), None) => {
718 ti += 1;
719 t
720 }
721 (None, Some(&a)) => {
722 ai += 1;
723 a
724 }
725 (None, None) => break,
726 };
727
728 let cp = &self.compiled_policies[next_idx];
729 if cp.tool_matcher.matches_normalized(&norm_tool, &norm_func) {
730 if let Some(verdict) = self.apply_compiled_policy_ctx(action, cp, context)? {
731 return Ok(verdict);
732 }
733 }
734 }
735 } else {
736 for cp in &self.compiled_policies {
737 if cp.tool_matcher.matches_normalized(&norm_tool, &norm_func) {
738 if let Some(verdict) = self.apply_compiled_policy_ctx(action, cp, context)? {
739 return Ok(verdict);
740 }
741 }
742 }
743 }
744
745 Ok(Verdict::Deny {
746 reason: "No matching policy".to_string(),
747 })
748 }
749
750 /// Apply a matched compiled policy to produce a verdict (no context).
751 /// Returns `None` when a Conditional policy with `on_no_match: "continue"` has no
752 /// constraints fire, signaling the evaluation loop to try the next policy.
753 fn apply_compiled_policy(
754 &self,
755 action: &Action,
756 cp: &CompiledPolicy,
757 ) -> Result<Option<Verdict>, EngineError> {
758 self.apply_compiled_policy_ctx(action, cp, None)
759 }
760
761 /// Apply a matched compiled policy with optional context.
762 fn apply_compiled_policy_ctx(
763 &self,
764 action: &Action,
765 cp: &CompiledPolicy,
766 context: Option<&EvaluationContext>,
767 ) -> Result<Option<Verdict>, EngineError> {
768 // Check path rules before policy type dispatch.
769 // Blocked paths → deny immediately regardless of policy type.
770 if let Some(denial) = self.check_path_rules(action, cp) {
771 Self::debug_assert_verified_deny(cp, true, false);
772 return Ok(Some(denial));
773 }
774 // Check network rules before policy type dispatch.
775 if let Some(denial) = self.check_network_rules(action, cp) {
776 Self::debug_assert_verified_deny(cp, true, false);
777 return Ok(Some(denial));
778 }
779 // Check IP rules (DNS rebinding protection) after network rules.
780 if let Some(denial) = self.check_ip_rules(action, cp) {
781 Self::debug_assert_verified_deny(cp, true, false);
782 return Ok(Some(denial));
783 }
784 // Check context conditions (session-level) before policy type dispatch.
785 // SECURITY: If a policy declares context conditions but no context is
786 // provided, deny the action (fail-closed). Skipping would let callers
787 // bypass time-window / max-calls / agent-id restrictions by omitting context.
788 if !cp.context_conditions.is_empty() {
789 match context {
790 Some(ctx) => {
791 // SECURITY (R231-ENG-3): Normalize tool name before passing to
792 // context conditions, consistent with policy matching which uses
793 // normalize_full(). Prevents future context conditions from
794 // receiving raw attacker-controlled tool names.
795 let norm_tool = crate::normalize::normalize_full(&action.tool);
796 if let Some(denial) = self.check_context_conditions(ctx, cp, &norm_tool) {
797 Self::debug_assert_verified_deny(cp, false, true);
798 return Ok(Some(denial));
799 }
800 }
801 None => {
802 Self::debug_assert_verified_deny(cp, false, true);
803 return Ok(Some(Verdict::Deny {
804 reason: format!(
805 "Policy '{}' requires evaluation context (has {} context condition(s)) but none was provided",
806 cp.policy.name,
807 cp.context_conditions.len()
808 ),
809 }));
810 }
811 }
812 }
813
814 match &cp.policy.policy_type {
815 PolicyType::Allow => {
816 Self::debug_assert_verified_allow(cp);
817 Ok(Some(Verdict::Allow))
818 }
819 PolicyType::Deny => {
820 Self::debug_assert_verified_policy_deny(cp);
821 Ok(Some(Verdict::Deny {
822 reason: cp.deny_reason.clone(),
823 }))
824 }
825 PolicyType::Conditional { .. } => self.evaluate_compiled_conditions(action, cp),
826 // Handle future variants - fail closed (deny)
827 _ => {
828 // SECURITY (R239-XCUT-5): Genericize — policy name in debug only.
829 tracing::debug!(policy = %cp.policy.name, "Request denied (unknown policy type)");
830 Ok(Some(Verdict::Deny {
831 reason: "Request denied (unknown policy type)".to_string(),
832 }))
833 }
834 }
835 }
836
837 /// Debug-assert: verified core confirms rule/context override produces Deny.
838 #[inline]
839 fn debug_assert_verified_deny(cp: &CompiledPolicy, rule_override: bool, ctx_deny: bool) {
840 debug_assert!({
841 let rm = verified_core::ResolvedMatch {
842 matched: true,
843 is_deny: matches!(cp.policy.policy_type, PolicyType::Deny),
844 is_conditional: matches!(cp.policy.policy_type, PolicyType::Conditional { .. }),
845 priority: u32::try_from(cp.policy.priority.max(0)).unwrap_or(0),
846 rule_override_deny: rule_override,
847 context_deny: ctx_deny,
848 require_approval: false,
849 condition_fired: false,
850 condition_verdict: verified_core::VerdictKind::Deny,
851 on_no_match_continue: false,
852 all_constraints_skipped: false,
853 };
854 verified_core::compute_single_verdict(&rm)
855 == verified_core::VerdictOutcome::Decided(verified_core::VerdictKind::Deny)
856 });
857 }
858
859 /// Debug-assert: verified core confirms Allow policy produces Allow.
860 #[inline]
861 fn debug_assert_verified_allow(cp: &CompiledPolicy) {
862 debug_assert!({
863 let rm = verified_core::ResolvedMatch {
864 matched: true,
865 is_deny: false,
866 is_conditional: false,
867 priority: u32::try_from(cp.policy.priority.max(0)).unwrap_or(0),
868 rule_override_deny: false,
869 context_deny: false,
870 require_approval: false,
871 condition_fired: false,
872 condition_verdict: verified_core::VerdictKind::Allow,
873 on_no_match_continue: false,
874 all_constraints_skipped: false,
875 };
876 verified_core::compute_single_verdict(&rm)
877 == verified_core::VerdictOutcome::Decided(verified_core::VerdictKind::Allow)
878 });
879 }
880
881 /// Debug-assert: verified core confirms Deny policy produces Deny.
882 #[inline]
883 fn debug_assert_verified_policy_deny(cp: &CompiledPolicy) {
884 debug_assert!({
885 let rm = verified_core::ResolvedMatch {
886 matched: true,
887 is_deny: true,
888 is_conditional: false,
889 priority: u32::try_from(cp.policy.priority.max(0)).unwrap_or(0),
890 rule_override_deny: false,
891 context_deny: false,
892 require_approval: false,
893 condition_fired: false,
894 condition_verdict: verified_core::VerdictKind::Deny,
895 on_no_match_continue: false,
896 all_constraints_skipped: false,
897 };
898 verified_core::compute_single_verdict(&rm)
899 == verified_core::VerdictOutcome::Decided(verified_core::VerdictKind::Deny)
900 });
901 }
902
903 /// Normalize a file path: resolve `..`, `.`, reject null bytes, ensure deterministic form.
904 ///
905 /// Handles percent-encoding, null bytes, and path traversal attempts.
906 pub fn normalize_path(raw: &str) -> Result<String, EngineError> {
907 path::normalize_path(raw)
908 }
909
910 /// Normalize a file path with a configurable percent-decoding iteration limit.
911 ///
912 /// Use this variant when you need to control the maximum decode iterations
913 /// to prevent DoS from deeply nested percent-encoding.
914 pub fn normalize_path_bounded(raw: &str, max_iterations: u32) -> Result<String, EngineError> {
915 path::normalize_path_bounded(raw, max_iterations)
916 }
917
918 /// Extract the domain from a URL string.
919 ///
920 /// Returns the host portion of the URL, or the original string if parsing fails.
921 pub fn extract_domain(url: &str) -> String {
922 domain::extract_domain(url)
923 }
924
925 /// Match a domain against a pattern like `*.example.com` or `example.com`.
926 ///
927 /// Supports wildcard patterns with `*.` prefix for subdomain matching.
928 pub fn match_domain_pattern(domain_str: &str, pattern: &str) -> bool {
929 domain::match_domain_pattern(domain_str, pattern)
930 }
931
932 /// Normalize a domain for matching: lowercase, strip trailing dots, apply IDNA.
933 ///
934 /// See [`domain::normalize_domain_for_match`] for details.
935 fn normalize_domain_for_match(s: &str) -> Option<std::borrow::Cow<'_, str>> {
936 domain::normalize_domain_for_match(s)
937 }
938
939 /// Maximum regex pattern length to prevent ReDoS via overlength patterns.
940 const MAX_REGEX_LEN: usize = 1024;
941
942 /// Validate a regex pattern for ReDoS safety.
943 ///
944 /// Rejects patterns that are too long (>1024 chars) or contain constructs
945 /// known to cause exponential backtracking:
946 ///
947 /// 1. **Nested quantifiers** like `(a+)+`, `(a*)*`, `(a+)*`, `(a*)+`
948 /// 2. **Overlapping alternation with quantifiers** like `(a|a)+` or `(a|ab)+`
949 ///
950 /// **Known limitations (FIND-R46-007):** This is a heuristic check, not a
951 /// full NFA analysis. It does NOT detect all possible ReDoS patterns:
952 /// - Alternation with overlapping character classes (e.g., `([a-z]|[a-m])+`)
953 /// - Backreferences with quantifiers
954 /// - Lookahead/lookbehind with quantifiers
955 /// - Possessive quantifiers (these are actually safe but not recognized)
956 ///
957 /// The `regex` crate uses a DFA/NFA hybrid that is immune to most ReDoS,
958 /// but pattern compilation itself can be expensive for very complex patterns,
959 /// hence the length limit.
960 fn validate_regex_safety(pattern: &str) -> Result<(), String> {
961 if pattern.len() > Self::MAX_REGEX_LEN {
962 return Err(format!(
963 "Regex pattern exceeds maximum length of {} chars ({} chars)",
964 Self::MAX_REGEX_LEN,
965 pattern.len()
966 ));
967 }
968
969 // Detect nested quantifiers: a quantifier applied to a group that
970 // itself contains a quantifier. Simplified check for common patterns.
971 let quantifiers = ['+', '*'];
972 let mut paren_depth = 0i32;
973 let mut has_inner_quantifier = false;
974 let chars: Vec<char> = pattern.chars().collect();
975 // SECURITY (R8-5): Use a skip_next flag to correctly handle escape
976 // sequences. The previous approach checked chars[i-1] == '\\' but
977 // failed for double-escapes like `\\\\(` (literal backslash + open paren).
978 let mut skip_next = false;
979
980 // Track alternation branches within groups to detect overlapping alternation.
981 // SECURITY (FIND-R46-007): Detect `(branch1|branch2)+` where branches share
982 // a common prefix, which can cause backtracking even without nested quantifiers.
983 let mut group_has_alternation = false;
984
985 for i in 0..chars.len() {
986 if skip_next {
987 skip_next = false;
988 continue;
989 }
990 match chars[i] {
991 '\\' => {
992 // Skip the NEXT character (the escaped one)
993 skip_next = true;
994 continue;
995 }
996 '(' => {
997 paren_depth += 1;
998 has_inner_quantifier = false;
999 group_has_alternation = false;
1000 }
1001 ')' => {
1002 paren_depth -= 1;
1003 // SECURITY (FIND-R58-ENG-002): Reject unbalanced closing parens.
1004 // Negative paren_depth disables alternation/inner-quantifier
1005 // tracking, allowing ReDoS patterns to bypass the safety check.
1006 if paren_depth < 0 {
1007 return Err(format!(
1008 "Invalid regex pattern — unbalanced parentheses: '{}'",
1009 &pattern[..pattern.len().min(100)]
1010 ));
1011 }
1012 // Check if the next char is a quantifier
1013 if i + 1 < chars.len() && quantifiers.contains(&chars[i + 1]) {
1014 if has_inner_quantifier {
1015 return Err(format!(
1016 "Regex pattern contains nested quantifiers (potential ReDoS): '{}'",
1017 &pattern[..pattern.len().min(100)]
1018 ));
1019 }
1020 // FIND-R46-007: Alternation with a quantifier on the group
1021 // can cause backtracking if branches overlap.
1022 if group_has_alternation {
1023 return Err(format!(
1024 "Regex pattern contains alternation with outer quantifier (potential ReDoS): '{}'",
1025 &pattern[..pattern.len().min(100)]
1026 ));
1027 }
1028 }
1029 }
1030 '|' if paren_depth > 0 => {
1031 group_has_alternation = true;
1032 }
1033 c if quantifiers.contains(&c) && paren_depth > 0 => {
1034 has_inner_quantifier = true;
1035 }
1036 _ => {}
1037 }
1038 }
1039
1040 // SECURITY (FIND-R58-ENG-004): Reject patterns with unclosed parentheses.
1041 if paren_depth != 0 {
1042 return Err(format!(
1043 "Invalid regex pattern — unbalanced parentheses ({} unclosed): '{}'",
1044 paren_depth,
1045 &pattern[..pattern.len().min(100)]
1046 ));
1047 }
1048
1049 Ok(())
1050 }
1051
1052 /// Compile a regex pattern and test whether it matches the input.
1053 ///
1054 /// Legacy path: compiles the pattern on each call (no caching).
1055 /// For zero-overhead evaluation, use `with_policies()` to pre-compile.
1056 ///
1057 /// Validates the pattern for ReDoS safety before compilation (H2).
1058 fn regex_is_match(
1059 &self,
1060 pattern: &str,
1061 input: &str,
1062 policy_id: &str,
1063 ) -> Result<bool, EngineError> {
1064 Self::validate_regex_safety(pattern).map_err(|reason| EngineError::InvalidCondition {
1065 policy_id: policy_id.to_string(),
1066 reason,
1067 })?;
1068 let re = Regex::new(pattern).map_err(|e| EngineError::InvalidCondition {
1069 policy_id: policy_id.to_string(),
1070 reason: format!("Invalid regex pattern '{pattern}': {e}"),
1071 })?;
1072 Ok(re.is_match(input))
1073 }
1074
1075 /// Compile a glob pattern and test whether it matches the input.
1076 ///
1077 /// Legacy path: compiles the pattern on each call (no caching).
1078 /// For zero-overhead evaluation, use `with_policies()` to pre-compile.
1079 fn glob_is_match(
1080 &self,
1081 pattern: &str,
1082 input: &str,
1083 policy_id: &str,
1084 ) -> Result<bool, EngineError> {
1085 // SECURITY: On poisoned read lock, treat as cache miss rather than
1086 // accessing potentially corrupted data. The pattern will be compiled fresh.
1087 {
1088 let cache_result = self.glob_matcher_cache.read();
1089 match cache_result {
1090 Ok(cache) => {
1091 if let Some(matcher) = cache.get(pattern) {
1092 return Ok(matcher.is_match(input));
1093 }
1094 }
1095 Err(e) => {
1096 tracing::warn!(
1097 "glob_matcher_cache read lock poisoned, treating as cache miss: {}",
1098 e
1099 );
1100 // Fall through to compile the pattern fresh
1101 }
1102 }
1103 }
1104
1105 let matcher = Glob::new(pattern)
1106 .map_err(|e| EngineError::InvalidCondition {
1107 policy_id: policy_id.to_string(),
1108 reason: format!("Invalid glob pattern '{pattern}': {e}"),
1109 })?
1110 .compile_matcher();
1111 let is_match = matcher.is_match(input);
1112
1113 // SECURITY: On poisoned write lock, skip cache insertion rather than
1114 // writing into potentially corrupted state. The result is still correct,
1115 // just not cached.
1116 let cache_write = self.glob_matcher_cache.write();
1117 let mut cache = match cache_write {
1118 Ok(guard) => guard,
1119 Err(e) => {
1120 tracing::warn!(
1121 "glob_matcher_cache write lock poisoned, skipping cache insert: {}",
1122 e
1123 );
1124 return Ok(is_match);
1125 }
1126 };
1127 // FIND-R58-ENG-011: Full cache.clear() can cause a thundering herd of
1128 // recompilation on the legacy (non-precompiled) path. For production,
1129 // use with_policies() to pre-compile patterns and avoid this cache entirely.
1130 if cache.len() >= MAX_GLOB_MATCHER_CACHE_ENTRIES {
1131 // SECURITY (P3-ENG-004): Warn on cache eviction so cache thrashing is
1132 // observable in logs. This indicates a policy set with more unique glob
1133 // patterns than MAX_GLOB_MATCHER_CACHE_ENTRIES, which causes repeated
1134 // recompilation and may indicate a misconfiguration or DoS attempt.
1135 tracing::warn!(
1136 capacity = MAX_GLOB_MATCHER_CACHE_ENTRIES,
1137 "glob_matcher_cache capacity exceeded — clearing cache (cache thrashing possible; prefer with_policies() to pre-compile patterns)"
1138 );
1139 cache.clear();
1140 }
1141 cache.insert(pattern.to_string(), matcher);
1142
1143 Ok(is_match)
1144 }
1145
1146 /// Retrieve a parameter value by dot-separated path.
1147 ///
1148 /// Supports both simple keys (`"path"`) and nested paths (`"config.output.path"`).
1149 ///
1150 /// **Resolution order** (Exploit #5 fix): When the path contains dots, the function
1151 /// checks both an exact key match (e.g., `params["config.path"]`) and dot-split
1152 /// traversal (e.g., `params["config"]["path"]`).
1153 ///
1154 /// **Ambiguity handling (fail-closed):** If both interpretations resolve to different
1155 /// values, the function returns `None`. This prevents an attacker from shadowing a
1156 /// nested value with a literal dotted key (or vice versa). The `None` triggers
1157 /// deny behavior through the constraint's `on_missing` handling.
1158 ///
1159 /// When only one interpretation resolves, that value is returned.
1160 /// When both resolve to the same value, that value is returned.
1161 ///
1162 /// IMPROVEMENT_PLAN 4.1: Also supports bracket notation for array access:
1163 /// - `items[0]` — access first element of array "items"
1164 /// - `config.items[0].path` — traverse nested path with array access
1165 /// - `matrix[0][1]` — multi-dimensional array access
1166 pub fn get_param_by_path<'a>(
1167 params: &'a serde_json::Value,
1168 path: &str,
1169 ) -> Option<&'a serde_json::Value> {
1170 let exact_match = params.get(path);
1171
1172 // For non-dotted paths without brackets, exact match is the only interpretation
1173 if !path.contains('.') && !path.contains('[') {
1174 return exact_match;
1175 }
1176
1177 // Try dot-split traversal for nested objects with bracket notation support
1178 let traversal_match = Self::traverse_path(params, path);
1179
1180 match (exact_match, traversal_match) {
1181 // Both exist but differ: ambiguous — fail-closed (return None)
1182 (Some(exact), Some(traversal)) if exact != traversal => None,
1183 // Both exist and are equal: no ambiguity
1184 (Some(exact), Some(_)) => Some(exact),
1185 // Only one interpretation resolves
1186 (Some(exact), None) => Some(exact),
1187 (None, Some(traversal)) => Some(traversal),
1188 (None, None) => None,
1189 }
1190 }
1191
1192 /// Traverse a JSON value using a path with dot notation and bracket notation.
1193 ///
1194 /// Supports:
1195 /// - `foo.bar` — nested object access
1196 /// - `items[0]` — array index access
1197 /// - `foo.items[0].bar` — mixed traversal
1198 /// - `matrix[0][1]` — consecutive array access
1199 fn traverse_path<'a>(
1200 params: &'a serde_json::Value,
1201 path: &str,
1202 ) -> Option<&'a serde_json::Value> {
1203 let mut current = params;
1204
1205 // Split by dots first, then handle bracket notation within each segment
1206 for segment in path.split('.') {
1207 if segment.is_empty() {
1208 continue;
1209 }
1210
1211 // Check for bracket notation: field[index] or just [index]
1212 if let Some(bracket_pos) = segment.find('[') {
1213 // Get the field name before the bracket (may be empty for [0][1] style)
1214 let field_name = &segment[..bracket_pos];
1215
1216 // If there's a field name, traverse into it first
1217 if !field_name.is_empty() {
1218 current = current.get(field_name)?;
1219 }
1220
1221 // Parse all bracket indices in this segment: [0][1][2]...
1222 let mut rest = &segment[bracket_pos..];
1223 while rest.starts_with('[') {
1224 let close_pos = rest.find(']')?;
1225 let index_str = &rest[1..close_pos];
1226 let index: usize = index_str.parse().ok()?;
1227
1228 // Access array element
1229 current = current.get(index)?;
1230
1231 // Move past this bracket pair
1232 rest = &rest[close_pos + 1..];
1233 }
1234
1235 // If there's remaining content after brackets, it's malformed
1236 if !rest.is_empty() {
1237 return None;
1238 }
1239 } else {
1240 // Simple field access
1241 current = current.get(segment)?;
1242 }
1243 }
1244
1245 Some(current)
1246 }
1247
1248 /// Maximum number of string values to collect during recursive parameter scanning.
1249 /// Prevents DoS from parameters with thousands of nested string values.
1250 const MAX_SCAN_VALUES: usize = 500;
1251
1252 /// Maximum nesting depth for recursive parameter scanning.
1253 ///
1254 /// 32 levels is sufficient for any reasonable MCP tool parameter structure
1255 /// (typical JSON has 3-5 levels; 32 provides ample headroom). Objects or
1256 /// arrays nested beyond this depth are silently skipped — their string
1257 /// values will not be collected for constraint evaluation or DLP scanning.
1258 /// This prevents stack/memory exhaustion from attacker-crafted deeply nested JSON.
1259 const MAX_JSON_DEPTH: usize = 32;
1260
1261 /// Maximum work stack size for iterative JSON traversal.
1262 ///
1263 /// SECURITY (FIND-R168-003): Caps the iterative traversal stack to prevent
1264 /// transient memory spikes from flat JSON objects/arrays with many children.
1265 /// Without this, a 1MB JSON with 100K keys at depth 0 would push all 100K
1266 /// items before the depth/results checks trigger.
1267 const MAX_STACK_SIZE: usize = 10_000;
1268
1269 /// Recursively collect all string values from a JSON structure.
1270 ///
1271 /// Returns a list of `(path, value)` pairs where `path` is a dot-separated
1272 /// description of where the value was found (e.g., `"options.target"`), and
1273 /// a boolean indicating whether results were truncated at [`MAX_SCAN_VALUES`].
1274 /// Uses an iterative approach to avoid stack overflow on deep JSON.
1275 ///
1276 /// Bounded by [`MAX_SCAN_VALUES`] total values and [`MAX_JSON_DEPTH`] nesting depth.
1277 ///
1278 /// SECURITY (R234-ENG-4): Returns truncation flag so callers can fail-closed
1279 /// when the parameter space exceeds scan capacity.
1280 fn collect_all_string_values(params: &serde_json::Value) -> (Vec<(String, &str)>, bool) {
1281 // Pre-allocate for typical parameter sizes; bounded by MAX_SCAN_VALUES
1282 let mut results = Vec::with_capacity(16);
1283 let mut truncated = false;
1284 // Stack: (value, current_path, depth)
1285 let mut stack: Vec<(&serde_json::Value, String, usize)> = vec![(params, String::new(), 0)];
1286
1287 while let Some((val, path, depth)) = stack.pop() {
1288 if results.len() >= Self::MAX_SCAN_VALUES {
1289 truncated = true;
1290 break;
1291 }
1292 match val {
1293 serde_json::Value::String(s) => {
1294 if !path.is_empty() {
1295 results.push((path, s.as_str()));
1296 }
1297 }
1298 serde_json::Value::Object(obj) => {
1299 if depth >= Self::MAX_JSON_DEPTH {
1300 continue;
1301 }
1302 for (key, child) in obj {
1303 // SECURITY (FIND-R168-003): Bound stack inside push loop.
1304 if stack.len() >= Self::MAX_STACK_SIZE {
1305 break;
1306 }
1307 let child_path = if path.is_empty() {
1308 key.clone()
1309 } else {
1310 let mut p = String::with_capacity(path.len() + 1 + key.len());
1311 p.push_str(&path);
1312 p.push('.');
1313 p.push_str(key);
1314 p
1315 };
1316 stack.push((child, child_path, depth + 1));
1317 }
1318 }
1319 serde_json::Value::Array(arr) => {
1320 if depth >= Self::MAX_JSON_DEPTH {
1321 continue;
1322 }
1323 for (i, child) in arr.iter().enumerate() {
1324 if stack.len() >= Self::MAX_STACK_SIZE {
1325 break;
1326 }
1327 let child_path = if path.is_empty() {
1328 format!("[{i}]")
1329 } else {
1330 format!("{path}[{i}]")
1331 };
1332 stack.push((child, child_path, depth + 1));
1333 }
1334 }
1335 _ => {}
1336 }
1337 }
1338
1339 (results, truncated)
1340 }
1341
1342 /// Convert an `on_match` action string into a Verdict.
1343 fn make_constraint_verdict(on_match: &str, reason: &str) -> Result<Verdict, EngineError> {
1344 match on_match {
1345 "deny" => Ok(Verdict::Deny {
1346 reason: reason.to_string(),
1347 }),
1348 "require_approval" => Ok(Verdict::RequireApproval {
1349 reason: reason.to_string(),
1350 }),
1351 "allow" => Ok(Verdict::Allow),
1352 other => Err(EngineError::EvaluationError(format!(
1353 "Unknown on_match action: '{other}'"
1354 ))),
1355 }
1356 }
1357 /// Returns true if any compiled policy has IP rules configured.
1358 ///
1359 /// Used by proxy layers to skip DNS resolution when no policies require it.
1360 pub fn has_ip_rules(&self) -> bool {
1361 self.compiled_policies
1362 .iter()
1363 .any(|cp| cp.compiled_ip_rules.is_some())
1364 }
1365}
1366
1367#[cfg(test)]
1368#[allow(deprecated)] // evaluate_action_with_context: migration tracked in FIND-CREATIVE-005
1369#[path = "engine_tests.rs"]
1370mod tests;