vellaveto_engine/lib.rs
1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4//
5// Copyright 2026 Paolo Vella
6// SPDX-License-Identifier: MPL-2.0
7
8//! Policy evaluation engine for the Vellaveto MCP tool firewall.
9//!
10//! Evaluates [`Action`](vellaveto_types::core::Action) requests against
11//! configured [`Policy`](vellaveto_types::core::Policy) rules and returns a
12//! [`Verdict`](vellaveto_types::core::Verdict) (Allow, Deny, or RequireApproval).
13//! Supports glob/regex path matching, domain/IP rules, ABAC attribute constraints,
14//! call-chain validation, decision caching (LRU+TTL), and Wasm policy plugins.
15//!
16//! The engine is synchronous by design — all evaluation completes in <5ms P99.
17
18pub mod abac;
19pub mod acis;
20pub mod adaptive_rate;
21pub mod behavioral;
22pub mod cache;
23pub mod cascading;
24pub mod circuit_breaker;
25pub mod collusion;
26mod compiled;
27mod constraint_eval;
28pub mod contagion;
29mod context_check;
30pub mod coverage;
31pub mod delegation;
32pub mod deputy;
33
34#[cfg(test)]
35mod channel_separation_tests;
36mod domain;
37mod entropy_gate;
38mod error;
39pub mod impact;
40mod ip;
41pub mod least_agency;
42mod legacy;
43pub mod lint;
44mod matcher;
45mod normalize;
46mod path;
47mod policy_compile;
48mod rule_check;
49pub mod sequence;
50mod traced;
51mod verified_capability_context;
52mod verified_capability_delegation_context;
53pub mod verified_constraint_eval;
54mod verified_context_delegation;
55pub mod verified_core;
56mod verified_deputy;
57mod verified_entropy_gate;
58pub mod wasm_plugin;
59
60#[cfg(kani)]
61mod kani_proofs;
62
63pub use compiled::{
64 CompiledConstraint, CompiledContextCondition, CompiledIpRules, CompiledNetworkRules,
65 CompiledPathRules, CompiledPolicy,
66};
67pub use error::{EngineError, PolicyValidationError};
68pub use matcher::{CompiledToolMatcher, PatternMatcher};
69pub use path::DEFAULT_MAX_PATH_DECODE_ITERATIONS;
70
71use vellaveto_types::{
72 Action, ActionSummary, EvaluationContext, EvaluationTrace, Policy, PolicyType, Verdict,
73};
74
75use globset::{Glob, GlobMatcher};
76use regex::Regex;
77use std::collections::HashMap;
78use std::sync::RwLock;
79
80/// Maximum number of compiled glob matchers kept in the legacy runtime cache.
81const MAX_GLOB_MATCHER_CACHE_ENTRIES: usize = 2048;
82/// Maximum number of domain normalization results kept in the runtime cache.
83///
84/// Currently the cache starts empty and is not actively populated by
85/// evaluation paths (domain normalization is done inline). The constant is
86/// retained as the documented eviction cap for the `domain_norm_cache`
87/// field so that any future population path has a bound ready.
88#[allow(dead_code)]
89const MAX_DOMAIN_NORM_CACHE_ENTRIES: usize = 4096;
90
91/// The core policy evaluation engine.
92///
93/// Evaluates [`Action`]s against a set of [`Policy`] rules to produce a [`Verdict`].
94///
95/// # Security Model
96///
97/// - **Fail-closed**: An empty policy set produces `Verdict::Deny`.
98/// - **Priority ordering**: Higher-priority policies are evaluated first.
99/// - **Pattern matching**: Policy IDs use `"tool:function"` convention with wildcard support.
100pub struct PolicyEngine {
101 strict_mode: bool,
102 compiled_policies: Vec<CompiledPolicy>,
103 /// Maps exact tool names to sorted indices in `compiled_policies`.
104 /// Only policies with an exact tool name pattern are indexed here.
105 tool_index: HashMap<String, Vec<usize>>,
106 /// Indices of policies that cannot be indexed by tool name
107 /// (Universal, prefix, suffix, or Any tool patterns).
108 /// Already sorted by position in `compiled_policies` (= priority order).
109 always_check: Vec<usize>,
110 /// When false (default), time-window context conditions always use wall-clock
111 /// time. When true, the engine honors `EvaluationContext.timestamp` from the
112 /// caller. **Only enable for deterministic testing** — in production, a client
113 /// could supply a fake timestamp to bypass time-window policies.
114 trust_context_timestamps: bool,
115 /// Maximum percent-decoding iterations in `normalize_path` before
116 /// fail-closing to `"/"`. Defaults to [`DEFAULT_MAX_PATH_DECODE_ITERATIONS`] (20).
117 max_path_decode_iterations: u32,
118 /// Legacy runtime cache for glob matcher compilation.
119 ///
120 /// This cache is used by `glob_is_match` on the non-precompiled path.
121 glob_matcher_cache: RwLock<HashMap<String, GlobMatcher>>,
122 /// Runtime cache for domain normalization results.
123 ///
124 /// Caches both successful normalization (Some) and invalid domains (None)
125 /// to avoid repeated IDNA parsing on hot network/domain constraint paths.
126 ///
127 /// SECURITY (FIND-R46-003): Bounded to [`MAX_DOMAIN_NORM_CACHE_ENTRIES`].
128 /// When capacity is exceeded, the cache is cleared to prevent unbounded
129 /// memory growth from attacker-controlled domain strings. Currently this
130 /// cache is not actively populated — domain normalization is done inline
131 /// via [`domain::normalize_domain_for_match`]. The eviction guard exists
132 /// as a defense-in-depth measure for future caching additions.
133 domain_norm_cache: RwLock<HashMap<String, Option<String>>>,
134 /// Optional topology guard for pre-policy tool call filtering.
135 /// When set, tool calls are checked against the live topology graph
136 /// before policy evaluation. Unknown tools may be denied or trigger
137 /// a re-crawl depending on configuration.
138 ///
139 /// Only available when the `discovery` feature is enabled.
140 #[cfg(feature = "discovery")]
141 topology_guard: Option<std::sync::Arc<vellaveto_discovery::guard::TopologyGuard>>,
142}
143
144impl std::fmt::Debug for PolicyEngine {
145 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
146 let mut s = f.debug_struct("PolicyEngine");
147 s.field("strict_mode", &self.strict_mode)
148 .field("compiled_policies_count", &self.compiled_policies.len())
149 .field("indexed_tools", &self.tool_index.len())
150 .field("always_check_count", &self.always_check.len())
151 .field(
152 "max_path_decode_iterations",
153 &self.max_path_decode_iterations,
154 )
155 .field(
156 "glob_matcher_cache_size",
157 &self
158 .glob_matcher_cache
159 .read()
160 .map(|c| c.len())
161 .unwrap_or_default(),
162 )
163 .field(
164 "domain_norm_cache_size",
165 &self
166 .domain_norm_cache
167 .read()
168 .map(|c| c.len())
169 .unwrap_or_default(),
170 );
171 #[cfg(feature = "discovery")]
172 {
173 s.field("topology_guard", &self.topology_guard.is_some());
174 }
175 s.finish()
176 }
177}
178
179impl PolicyEngine {
180 /// Create a new policy engine.
181 ///
182 /// When `strict_mode` is true, the engine applies stricter validation
183 /// on conditions and parameters.
184 pub fn new(strict_mode: bool) -> Self {
185 Self {
186 strict_mode,
187 compiled_policies: Vec::new(),
188 tool_index: HashMap::new(),
189 always_check: Vec::new(),
190 trust_context_timestamps: false,
191 max_path_decode_iterations: DEFAULT_MAX_PATH_DECODE_ITERATIONS,
192 glob_matcher_cache: RwLock::new(HashMap::with_capacity(256)),
193 // IMP-R208-001: Zero initial capacity — cache not actively populated.
194 domain_norm_cache: RwLock::new(HashMap::new()),
195 #[cfg(feature = "discovery")]
196 topology_guard: None,
197 }
198 }
199
200 /// Returns the engine's strict_mode setting.
201 pub fn strict_mode(&self) -> bool {
202 self.strict_mode
203 }
204
205 /// Validate a domain pattern used in network_rules.
206 ///
207 /// Rules per RFC 1035:
208 /// - Labels (parts between dots) must be 1-63 characters each
209 /// - Each label must be alphanumeric + hyphen only (no leading/trailing hyphen)
210 /// - Total domain length max 253 characters
211 /// - Wildcard `*.` prefix is allowed (only at the beginning)
212 /// - Empty string is rejected
213 ///
214 /// See the internal `domain::validate_domain_pattern` function for details.
215 pub fn validate_domain_pattern(pattern: &str) -> Result<(), String> {
216 domain::validate_domain_pattern(pattern)
217 }
218
219 /// Create a new policy engine with pre-compiled policies.
220 ///
221 /// All regex and glob patterns are compiled at construction time.
222 /// Invalid patterns cause immediate rejection with descriptive errors.
223 /// The compiled policies are sorted by priority (highest first, deny-overrides).
224 pub fn with_policies(
225 strict_mode: bool,
226 policies: &[Policy],
227 ) -> Result<Self, Vec<PolicyValidationError>> {
228 let compiled = Self::compile_policies(policies, strict_mode)?;
229 let (tool_index, always_check) = Self::build_tool_index(&compiled);
230 Ok(Self {
231 strict_mode,
232 compiled_policies: compiled,
233 tool_index,
234 always_check,
235 trust_context_timestamps: false,
236 max_path_decode_iterations: DEFAULT_MAX_PATH_DECODE_ITERATIONS,
237 glob_matcher_cache: RwLock::new(HashMap::with_capacity(256)),
238 // IMP-R208-001: Zero initial capacity — cache not actively populated.
239 domain_norm_cache: RwLock::new(HashMap::new()),
240 #[cfg(feature = "discovery")]
241 topology_guard: None,
242 })
243 }
244
245 /// Enable trusting `EvaluationContext.timestamp` for time-window checks.
246 ///
247 /// **WARNING:** Only use for deterministic testing. In production, a client
248 /// can supply a fake timestamp to bypass time-window policies.
249 #[cfg(test)]
250 pub fn set_trust_context_timestamps(&mut self, trust: bool) {
251 self.trust_context_timestamps = trust;
252 }
253
254 /// Set the topology guard for pre-policy tool call filtering.
255 ///
256 /// When set, `evaluate_action` checks the tool against the topology graph
257 /// before policy evaluation. Unknown tools produce `Verdict::Deny` with a
258 /// topology-specific reason, unless the guard returns `Bypassed`.
259 #[cfg(feature = "discovery")]
260 pub fn set_topology_guard(
261 &mut self,
262 guard: std::sync::Arc<vellaveto_discovery::guard::TopologyGuard>,
263 ) {
264 self.topology_guard = Some(guard);
265 }
266
267 /// Check the topology guard (if set) before policy evaluation.
268 ///
269 /// Returns `Some(Verdict::Deny)` if the tool is unknown or ambiguous
270 /// and the guard is configured to block. Returns `None` to proceed
271 /// with normal policy evaluation.
272 #[cfg(feature = "discovery")]
273 fn check_topology(&self, action: &Action) -> Option<Verdict> {
274 let guard = self.topology_guard.as_ref()?;
275 let tool_name = &action.tool;
276 match guard.check(tool_name) {
277 vellaveto_discovery::guard::TopologyVerdict::Known { .. } => None,
278 vellaveto_discovery::guard::TopologyVerdict::Bypassed => None,
279 vellaveto_discovery::guard::TopologyVerdict::Unknown { suggestion, .. } => {
280 let reason = if let Some(closest) = suggestion {
281 format!(
282 "Tool '{tool_name}' not found in topology graph (did you mean '{closest}'?)"
283 )
284 } else {
285 format!("Tool '{tool_name}' not found in topology graph")
286 };
287 Some(Verdict::Deny { reason })
288 }
289 vellaveto_discovery::guard::TopologyVerdict::Ambiguous { matches, .. } => {
290 Some(Verdict::Deny {
291 reason: format!(
292 "Tool '{}' is ambiguous — matches servers: {}. Use qualified name (server::tool).",
293 tool_name,
294 matches.join(", ")
295 ),
296 })
297 }
298 }
299 }
300
301 /// Set the maximum percent-decoding iterations for path normalization.
302 ///
303 /// Paths requiring more iterations fail-closed to `"/"`. The default is
304 /// [`DEFAULT_MAX_PATH_DECODE_ITERATIONS`] (20). A value of 0 disables
305 /// iterative decoding entirely (single pass only).
306 pub fn set_max_path_decode_iterations(&mut self, max: u32) {
307 self.max_path_decode_iterations = max;
308 }
309
310 /// Build a tool-name index for O(matching) evaluation.
311 fn build_tool_index(compiled: &[CompiledPolicy]) -> (HashMap<String, Vec<usize>>, Vec<usize>) {
312 let mut index: HashMap<String, Vec<usize>> = HashMap::with_capacity(compiled.len());
313 let mut always_check = Vec::with_capacity(compiled.len());
314 for (i, cp) in compiled.iter().enumerate() {
315 match &cp.tool_matcher {
316 CompiledToolMatcher::Universal => always_check.push(i),
317 CompiledToolMatcher::ToolOnly(PatternMatcher::Exact(name)) => {
318 index.entry(name.clone()).or_default().push(i);
319 }
320 CompiledToolMatcher::ToolAndFunction(PatternMatcher::Exact(name), _) => {
321 index.entry(name.clone()).or_default().push(i);
322 }
323 _ => always_check.push(i),
324 }
325 }
326 // SECURITY (FIND-R49-003): Assert sorted invariant in debug builds.
327 // The always_check list must be sorted by index for deterministic evaluation order.
328 // Tool index values must also be sorted per-key for the same reason.
329 debug_assert!(
330 always_check.windows(2).all(|w| w[0] < w[1]),
331 "always_check must be sorted"
332 );
333 debug_assert!(
334 index.values().all(|v| v.windows(2).all(|w| w[0] < w[1])),
335 "tool_index values must be sorted"
336 );
337 (index, always_check)
338 }
339
340 /// Sort policies by priority (highest first), with deny-overrides at equal priority,
341 /// and a stable tertiary tiebreaker by policy ID for deterministic ordering.
342 ///
343 /// Call this once when loading or modifying policies, then pass the sorted
344 /// slice to [`Self::evaluate_action`] to avoid re-sorting on every evaluation.
345 pub fn sort_policies(policies: &mut [Policy]) {
346 policies.sort_by(|a, b| {
347 let pri = b.priority.cmp(&a.priority);
348 if pri != std::cmp::Ordering::Equal {
349 return pri;
350 }
351 let a_deny = matches!(a.policy_type, PolicyType::Deny);
352 let b_deny = matches!(b.policy_type, PolicyType::Deny);
353 let deny_ord = b_deny.cmp(&a_deny);
354 if deny_ord != std::cmp::Ordering::Equal {
355 return deny_ord;
356 }
357 // Tertiary tiebreaker: lexicographic by ID for deterministic ordering
358 a.id.cmp(&b.id)
359 });
360 }
361
362 // VERIFIED [S1]: Deny-by-default — empty policy set produces Deny (MCPPolicyEngine.tla S1)
363 // VERIFIED [S2]: Priority ordering — higher priority wins (MCPPolicyEngine.tla S2)
364 // VERIFIED [S3]: Deny-overrides — Deny beats Allow at same priority (MCPPolicyEngine.tla S3)
365 // VERIFIED [S5]: Errors produce Deny — every Allow verdict has a matching Allow policy (MCPPolicyEngine.tla S5)
366 // VERIFIED [L1]: Progress — every action gets a verdict (MCPPolicyEngine.tla L1)
367 /// Evaluate an action against a set of policies.
368 ///
369 /// For best performance, pass policies that have been pre-sorted with
370 /// [`Self::sort_policies`]. If not pre-sorted, this method will sort a temporary
371 /// copy (which adds O(n log n) overhead per call).
372 ///
373 /// The first matching policy determines the verdict.
374 /// If no policy matches, the default is Deny (fail-closed).
375 #[must_use = "security verdicts must not be discarded"]
376 pub fn evaluate_action(
377 &self,
378 action: &Action,
379 policies: &[Policy],
380 ) -> Result<Verdict, EngineError> {
381 // Topology pre-filter: check if the tool exists in the topology graph.
382 // Unknown/ambiguous tools are denied before policy evaluation.
383 #[cfg(feature = "discovery")]
384 if let Some(deny) = self.check_topology(action) {
385 return Ok(deny);
386 }
387
388 // Fast path: use pre-compiled policies (zero Mutex, zero runtime compilation)
389 if !self.compiled_policies.is_empty() {
390 return self.evaluate_with_compiled(action);
391 }
392
393 // Legacy path: evaluate ad-hoc policies (compiles patterns on the fly)
394 if policies.is_empty() {
395 return Ok(Verdict::Deny {
396 reason: "No policies defined".to_string(),
397 });
398 }
399
400 // Check if already sorted (by priority desc, deny-first at equal priority,
401 // then by ID ascending as a tiebreaker — FIND-R44-057)
402 let is_sorted = policies.windows(2).all(|w| {
403 let pri = w[0].priority.cmp(&w[1].priority);
404 if pri == std::cmp::Ordering::Equal {
405 let a_deny = matches!(w[0].policy_type, PolicyType::Deny);
406 let b_deny = matches!(w[1].policy_type, PolicyType::Deny);
407 if a_deny == b_deny {
408 // FIND-R44-057: Tertiary tiebreaker by ID for deterministic ordering
409 w[0].id.cmp(&w[1].id) != std::cmp::Ordering::Greater
410 } else {
411 b_deny <= a_deny
412 }
413 } else {
414 pri != std::cmp::Ordering::Less
415 }
416 });
417
418 if is_sorted {
419 for policy in policies {
420 if self.matches_action(action, policy) {
421 if let Some(verdict) = self.apply_policy(action, policy)? {
422 return Ok(verdict);
423 }
424 // None: on_no_match="continue", try next policy
425 }
426 }
427 } else {
428 let mut sorted: Vec<&Policy> = policies.iter().collect();
429 sorted.sort_by(|a, b| {
430 let pri = b.priority.cmp(&a.priority);
431 if pri != std::cmp::Ordering::Equal {
432 return pri;
433 }
434 let a_deny = matches!(a.policy_type, PolicyType::Deny);
435 let b_deny = matches!(b.policy_type, PolicyType::Deny);
436 let deny_cmp = b_deny.cmp(&a_deny);
437 if deny_cmp != std::cmp::Ordering::Equal {
438 return deny_cmp;
439 }
440 // FIND-R44-057: Tertiary tiebreaker by ID for deterministic ordering
441 a.id.cmp(&b.id)
442 });
443 for policy in &sorted {
444 if self.matches_action(action, policy) {
445 if let Some(verdict) = self.apply_policy(action, policy)? {
446 return Ok(verdict);
447 }
448 // None: on_no_match="continue", try next policy
449 }
450 }
451 }
452
453 Ok(Verdict::Deny {
454 reason: "No matching policy".to_string(),
455 })
456 }
457
458 /// Evaluate an action with optional session context.
459 ///
460 /// This is the context-aware counterpart to [`Self::evaluate_action`].
461 /// When `context` is `Some`, context conditions (time windows, call limits,
462 /// agent identity, action history) are evaluated. When `None`, behaves
463 /// identically to `evaluate_action`.
464 ///
465 /// # WARNING: `policies` parameter ignored when compiled policies exist
466 ///
467 /// When the engine was constructed with [`Self::with_policies`] (or any
468 /// builder that populates `compiled_policies`), the `policies` parameter
469 /// is **completely ignored**. The engine uses its pre-compiled policy set
470 /// instead.
471 #[deprecated(
472 since = "4.0.1",
473 note = "policies parameter is silently ignored when compiled policies exist. \
474 Use evaluate_action() for compiled engines or build a new engine \
475 with with_policies() for dynamic policy sets."
476 )]
477 #[must_use = "security verdicts must not be discarded"]
478 pub fn evaluate_action_with_context(
479 &self,
480 action: &Action,
481 policies: &[Policy],
482 context: Option<&EvaluationContext>,
483 ) -> Result<Verdict, EngineError> {
484 #[cfg(feature = "discovery")]
485 if let Some(deny) = self.check_topology(action) {
486 return Ok(deny);
487 }
488 if let Some(ctx) = context {
489 if let Err(reason) = ctx.validate() {
490 return Ok(Verdict::Deny { reason });
491 }
492 }
493 if context.is_none() {
494 return self.evaluate_action(action, policies);
495 }
496 if !self.compiled_policies.is_empty() {
497 return self.evaluate_with_compiled_ctx(action, context);
498 }
499 if let Some(ctx) = context {
500 if ctx.has_any_meaningful_fields() {
501 return Ok(Verdict::Deny {
502 reason: "Policy engine has no compiled policies; \
503 context conditions cannot be evaluated (fail-closed)"
504 .to_string(),
505 });
506 }
507 }
508 self.evaluate_action(action, policies)
509 }
510
511 /// Evaluate an action with optional session context, returning only the verdict.
512 ///
513 /// This is the context-aware counterpart to [`Self::evaluate_action`].
514 /// When `context` is `Some`, context conditions (time windows, call limits,
515 /// agent identity, action history) are evaluated. When `None`, behaves
516 /// identically to `evaluate_action`.
517 ///
518 /// For the full decision trace, use [`Self::evaluate_action_traced_with_context`].
519 #[must_use = "security verdicts must not be discarded"]
520 pub fn evaluate_with_context(
521 &self,
522 action: &Action,
523 context: Option<&EvaluationContext>,
524 ) -> Result<Verdict, EngineError> {
525 self.evaluate_action_traced_with_context(action, context)
526 .map(|(verdict, _trace)| verdict)
527 }
528
529 /// Evaluate an action with full decision trace and optional session context.
530 #[must_use = "security verdicts must not be discarded"]
531 pub fn evaluate_action_traced_with_context(
532 &self,
533 action: &Action,
534 context: Option<&EvaluationContext>,
535 ) -> Result<(Verdict, EvaluationTrace), EngineError> {
536 // Topology pre-filter: check if the tool exists in the topology graph.
537 #[cfg(feature = "discovery")]
538 if let Some(deny) = self.check_topology(action) {
539 let param_keys: Vec<String> = action
540 .parameters
541 .as_object()
542 .map(|o| o.keys().cloned().collect::<Vec<String>>())
543 .unwrap_or_default();
544 let trace = EvaluationTrace {
545 action_summary: ActionSummary {
546 tool: action.tool.clone(),
547 function: action.function.clone(),
548 param_count: param_keys.len(),
549 param_keys,
550 },
551 policies_checked: 0,
552 policies_matched: 0,
553 matches: vec![],
554 verdict: deny.clone(),
555 duration_us: 0,
556 };
557 return Ok((deny, trace));
558 }
559
560 // SECURITY (FIND-R50-063): Validate context bounds before evaluation.
561 if let Some(ctx) = context {
562 if let Err(reason) = ctx.validate() {
563 let deny = Verdict::Deny {
564 reason: reason.clone(),
565 };
566 let param_keys: Vec<String> = action
567 .parameters
568 .as_object()
569 .map(|o| o.keys().cloned().collect::<Vec<String>>())
570 .unwrap_or_default();
571 let trace = EvaluationTrace {
572 action_summary: ActionSummary {
573 tool: action.tool.clone(),
574 function: action.function.clone(),
575 param_count: param_keys.len(),
576 param_keys,
577 },
578 policies_checked: 0,
579 policies_matched: 0,
580 matches: vec![],
581 verdict: deny.clone(),
582 duration_us: 0,
583 };
584 return Ok((deny, trace));
585 }
586 }
587 if context.is_none() {
588 return self.evaluate_action_traced(action);
589 }
590 // Traced context-aware path
591 self.evaluate_action_traced_ctx(action, context)
592 }
593
594 // ═══════════════════════════════════════════════════
595 // COMPILED EVALUATION PATH (zero Mutex, zero runtime compilation)
596 // ═══════════════════════════════════════════════════
597
598 /// Evaluate an action using pre-compiled policies. Zero Mutex acquisitions.
599 /// Compiled policies are already sorted at compile time.
600 ///
601 /// Uses the tool-name index when available: only checks policies whose tool
602 /// pattern could match `action.tool`, plus `always_check` (wildcard/prefix/suffix).
603 /// Falls back to linear scan when no index has been built.
604 fn evaluate_with_compiled(&self, action: &Action) -> Result<Verdict, EngineError> {
605 // SECURITY (FIND-SEM-003, R227-TYP-1): Normalize tool/function names through
606 // the full pipeline (NFKC + lowercase + homoglyph) before policy matching.
607 // This prevents fullwidth Unicode, circled letters (Ⓐ), and mathematical
608 // variants from bypassing exact-match Deny policies. Patterns are also
609 // normalized via normalize_full at compile time for consistency.
610 let norm_tool = crate::normalize::normalize_full(&action.tool);
611 let norm_func = crate::normalize::normalize_full(&action.function);
612
613 // If index was built, use it for O(matching) instead of O(all)
614 if !self.tool_index.is_empty() || !self.always_check.is_empty() {
615 let tool_specific = self.tool_index.get(&norm_tool);
616 let tool_slice = tool_specific.map_or(&[][..], |v| v.as_slice());
617 let always_slice = &self.always_check;
618
619 // Merge two sorted index slices, iterating in priority order.
620 // SECURITY (R26-ENG-1): When both slices reference the same policy index,
621 // increment BOTH pointers to avoid evaluating the policy twice.
622 let mut ti = 0;
623 let mut ai = 0;
624 loop {
625 let next_idx = match (tool_slice.get(ti), always_slice.get(ai)) {
626 (Some(&t), Some(&a)) => {
627 if t < a {
628 ti += 1;
629 t
630 } else if t > a {
631 ai += 1;
632 a
633 } else {
634 // t == a: same policy in both slices, skip duplicate
635 ti += 1;
636 ai += 1;
637 t
638 }
639 }
640 (Some(&t), None) => {
641 ti += 1;
642 t
643 }
644 (None, Some(&a)) => {
645 ai += 1;
646 a
647 }
648 (None, None) => break,
649 };
650
651 let cp = &self.compiled_policies[next_idx];
652 if cp.tool_matcher.matches_normalized(&norm_tool, &norm_func) {
653 if let Some(verdict) = self.apply_compiled_policy(action, cp)? {
654 return Ok(verdict);
655 }
656 // None: on_no_match="continue", try next policy
657 }
658 }
659 } else {
660 // No index: linear scan (legacy compiled path)
661 for cp in &self.compiled_policies {
662 if cp.tool_matcher.matches_normalized(&norm_tool, &norm_func) {
663 if let Some(verdict) = self.apply_compiled_policy(action, cp)? {
664 return Ok(verdict);
665 }
666 // None: on_no_match="continue", try next policy
667 }
668 }
669 }
670
671 Ok(Verdict::Deny {
672 reason: "No matching policy".to_string(),
673 })
674 }
675
676 /// Evaluate with compiled policies and session context.
677 fn evaluate_with_compiled_ctx(
678 &self,
679 action: &Action,
680 context: Option<&EvaluationContext>,
681 ) -> Result<Verdict, EngineError> {
682 // SECURITY (FIND-SEM-003, R227-TYP-1): Normalize tool/function names through
683 // the full pipeline (same as evaluate_with_compiled).
684 let norm_tool = crate::normalize::normalize_full(&action.tool);
685 let norm_func = crate::normalize::normalize_full(&action.function);
686
687 if !self.tool_index.is_empty() || !self.always_check.is_empty() {
688 let tool_specific = self.tool_index.get(&norm_tool);
689 let tool_slice = tool_specific.map_or(&[][..], |v| v.as_slice());
690 let always_slice = &self.always_check;
691
692 // SECURITY (R26-ENG-1): Deduplicate merge — see evaluate_compiled().
693 let mut ti = 0;
694 let mut ai = 0;
695 loop {
696 let next_idx = match (tool_slice.get(ti), always_slice.get(ai)) {
697 (Some(&t), Some(&a)) => {
698 if t < a {
699 ti += 1;
700 t
701 } else if t > a {
702 ai += 1;
703 a
704 } else {
705 ti += 1;
706 ai += 1;
707 t
708 }
709 }
710 (Some(&t), None) => {
711 ti += 1;
712 t
713 }
714 (None, Some(&a)) => {
715 ai += 1;
716 a
717 }
718 (None, None) => break,
719 };
720
721 let cp = &self.compiled_policies[next_idx];
722 if cp.tool_matcher.matches_normalized(&norm_tool, &norm_func) {
723 if let Some(verdict) = self.apply_compiled_policy_ctx(action, cp, context)? {
724 return Ok(verdict);
725 }
726 }
727 }
728 } else {
729 for cp in &self.compiled_policies {
730 if cp.tool_matcher.matches_normalized(&norm_tool, &norm_func) {
731 if let Some(verdict) = self.apply_compiled_policy_ctx(action, cp, context)? {
732 return Ok(verdict);
733 }
734 }
735 }
736 }
737
738 Ok(Verdict::Deny {
739 reason: "No matching policy".to_string(),
740 })
741 }
742
743 /// Apply a matched compiled policy to produce a verdict (no context).
744 /// Returns `None` when a Conditional policy with `on_no_match: "continue"` has no
745 /// constraints fire, signaling the evaluation loop to try the next policy.
746 fn apply_compiled_policy(
747 &self,
748 action: &Action,
749 cp: &CompiledPolicy,
750 ) -> Result<Option<Verdict>, EngineError> {
751 self.apply_compiled_policy_ctx(action, cp, None)
752 }
753
754 /// Apply a matched compiled policy with optional context.
755 fn apply_compiled_policy_ctx(
756 &self,
757 action: &Action,
758 cp: &CompiledPolicy,
759 context: Option<&EvaluationContext>,
760 ) -> Result<Option<Verdict>, EngineError> {
761 // Check path rules before policy type dispatch.
762 // Blocked paths → deny immediately regardless of policy type.
763 if let Some(denial) = self.check_path_rules(action, cp) {
764 Self::debug_assert_verified_deny(cp, true, false);
765 return Ok(Some(denial));
766 }
767 // Check network rules before policy type dispatch.
768 if let Some(denial) = self.check_network_rules(action, cp) {
769 Self::debug_assert_verified_deny(cp, true, false);
770 return Ok(Some(denial));
771 }
772 // Check IP rules (DNS rebinding protection) after network rules.
773 if let Some(denial) = self.check_ip_rules(action, cp) {
774 Self::debug_assert_verified_deny(cp, true, false);
775 return Ok(Some(denial));
776 }
777 // Check context conditions (session-level) before policy type dispatch.
778 // SECURITY: If a policy declares context conditions but no context is
779 // provided, deny the action (fail-closed). Skipping would let callers
780 // bypass time-window / max-calls / agent-id restrictions by omitting context.
781 if !cp.context_conditions.is_empty() {
782 match context {
783 Some(ctx) => {
784 // SECURITY (R231-ENG-3): Normalize tool name before passing to
785 // context conditions, consistent with policy matching which uses
786 // normalize_full(). Prevents future context conditions from
787 // receiving raw attacker-controlled tool names.
788 let norm_tool = crate::normalize::normalize_full(&action.tool);
789 if let Some(denial) = self.check_context_conditions(ctx, cp, &norm_tool) {
790 Self::debug_assert_verified_deny(cp, false, true);
791 return Ok(Some(denial));
792 }
793 }
794 None => {
795 Self::debug_assert_verified_deny(cp, false, true);
796 return Ok(Some(Verdict::Deny {
797 reason: format!(
798 "Policy '{}' requires evaluation context (has {} context condition(s)) but none was provided",
799 cp.policy.name,
800 cp.context_conditions.len()
801 ),
802 }));
803 }
804 }
805 }
806
807 match &cp.policy.policy_type {
808 PolicyType::Allow => {
809 Self::debug_assert_verified_allow(cp);
810 Ok(Some(Verdict::Allow))
811 }
812 PolicyType::Deny => {
813 Self::debug_assert_verified_policy_deny(cp);
814 Ok(Some(Verdict::Deny {
815 reason: cp.deny_reason.clone(),
816 }))
817 }
818 PolicyType::Conditional { .. } => self.evaluate_compiled_conditions(action, cp),
819 // Handle future variants - fail closed (deny)
820 _ => {
821 // SECURITY (R239-XCUT-5): Genericize — policy name in debug only.
822 tracing::debug!(policy = %cp.policy.name, "Request denied (unknown policy type)");
823 Ok(Some(Verdict::Deny {
824 reason: "Request denied (unknown policy type)".to_string(),
825 }))
826 }
827 }
828 }
829
830 /// Debug-assert: verified core confirms rule/context override produces Deny.
831 #[inline]
832 fn debug_assert_verified_deny(cp: &CompiledPolicy, rule_override: bool, ctx_deny: bool) {
833 debug_assert!({
834 let rm = verified_core::ResolvedMatch {
835 matched: true,
836 is_deny: matches!(cp.policy.policy_type, PolicyType::Deny),
837 is_conditional: matches!(cp.policy.policy_type, PolicyType::Conditional { .. }),
838 priority: u32::try_from(cp.policy.priority.max(0)).unwrap_or(0),
839 rule_override_deny: rule_override,
840 context_deny: ctx_deny,
841 require_approval: false,
842 condition_fired: false,
843 condition_verdict: verified_core::VerdictKind::Deny,
844 on_no_match_continue: false,
845 all_constraints_skipped: false,
846 };
847 verified_core::compute_single_verdict(&rm)
848 == verified_core::VerdictOutcome::Decided(verified_core::VerdictKind::Deny)
849 });
850 }
851
852 /// Debug-assert: verified core confirms Allow policy produces Allow.
853 #[inline]
854 fn debug_assert_verified_allow(cp: &CompiledPolicy) {
855 debug_assert!({
856 let rm = verified_core::ResolvedMatch {
857 matched: true,
858 is_deny: false,
859 is_conditional: false,
860 priority: u32::try_from(cp.policy.priority.max(0)).unwrap_or(0),
861 rule_override_deny: false,
862 context_deny: false,
863 require_approval: false,
864 condition_fired: false,
865 condition_verdict: verified_core::VerdictKind::Allow,
866 on_no_match_continue: false,
867 all_constraints_skipped: false,
868 };
869 verified_core::compute_single_verdict(&rm)
870 == verified_core::VerdictOutcome::Decided(verified_core::VerdictKind::Allow)
871 });
872 }
873
874 /// Debug-assert: verified core confirms Deny policy produces Deny.
875 #[inline]
876 fn debug_assert_verified_policy_deny(cp: &CompiledPolicy) {
877 debug_assert!({
878 let rm = verified_core::ResolvedMatch {
879 matched: true,
880 is_deny: true,
881 is_conditional: false,
882 priority: u32::try_from(cp.policy.priority.max(0)).unwrap_or(0),
883 rule_override_deny: false,
884 context_deny: false,
885 require_approval: false,
886 condition_fired: false,
887 condition_verdict: verified_core::VerdictKind::Deny,
888 on_no_match_continue: false,
889 all_constraints_skipped: false,
890 };
891 verified_core::compute_single_verdict(&rm)
892 == verified_core::VerdictOutcome::Decided(verified_core::VerdictKind::Deny)
893 });
894 }
895
896 /// Normalize a file path: resolve `..`, `.`, reject null bytes, ensure deterministic form.
897 ///
898 /// Handles percent-encoding, null bytes, and path traversal attempts.
899 pub fn normalize_path(raw: &str) -> Result<String, EngineError> {
900 path::normalize_path(raw)
901 }
902
903 /// Normalize a file path with a configurable percent-decoding iteration limit.
904 ///
905 /// Use this variant when you need to control the maximum decode iterations
906 /// to prevent DoS from deeply nested percent-encoding.
907 pub fn normalize_path_bounded(raw: &str, max_iterations: u32) -> Result<String, EngineError> {
908 path::normalize_path_bounded(raw, max_iterations)
909 }
910
911 /// Extract the domain from a URL string.
912 ///
913 /// Returns the host portion of the URL, or the original string if parsing fails.
914 pub fn extract_domain(url: &str) -> String {
915 domain::extract_domain(url)
916 }
917
918 /// Match a domain against a pattern like `*.example.com` or `example.com`.
919 ///
920 /// Supports wildcard patterns with `*.` prefix for subdomain matching.
921 pub fn match_domain_pattern(domain_str: &str, pattern: &str) -> bool {
922 domain::match_domain_pattern(domain_str, pattern)
923 }
924
925 /// Normalize a domain for matching: lowercase, strip trailing dots, apply IDNA.
926 ///
927 /// See [`domain::normalize_domain_for_match`] for details.
928 fn normalize_domain_for_match(s: &str) -> Option<std::borrow::Cow<'_, str>> {
929 domain::normalize_domain_for_match(s)
930 }
931
932 /// Maximum regex pattern length to prevent ReDoS via overlength patterns.
933 const MAX_REGEX_LEN: usize = 1024;
934
935 /// Validate a regex pattern for ReDoS safety.
936 ///
937 /// Rejects patterns that are too long (>1024 chars) or contain constructs
938 /// known to cause exponential backtracking:
939 ///
940 /// 1. **Nested quantifiers** like `(a+)+`, `(a*)*`, `(a+)*`, `(a*)+`
941 /// 2. **Overlapping alternation with quantifiers** like `(a|a)+` or `(a|ab)+`
942 ///
943 /// **Known limitations (FIND-R46-007):** This is a heuristic check, not a
944 /// full NFA analysis. It does NOT detect all possible ReDoS patterns:
945 /// - Alternation with overlapping character classes (e.g., `([a-z]|[a-m])+`)
946 /// - Backreferences with quantifiers
947 /// - Lookahead/lookbehind with quantifiers
948 /// - Possessive quantifiers (these are actually safe but not recognized)
949 ///
950 /// The `regex` crate uses a DFA/NFA hybrid that is immune to most ReDoS,
951 /// but pattern compilation itself can be expensive for very complex patterns,
952 /// hence the length limit.
953 fn validate_regex_safety(pattern: &str) -> Result<(), String> {
954 if pattern.len() > Self::MAX_REGEX_LEN {
955 return Err(format!(
956 "Regex pattern exceeds maximum length of {} chars ({} chars)",
957 Self::MAX_REGEX_LEN,
958 pattern.len()
959 ));
960 }
961
962 // Detect nested quantifiers: a quantifier applied to a group that
963 // itself contains a quantifier. Simplified check for common patterns.
964 let quantifiers = ['+', '*'];
965 let mut paren_depth = 0i32;
966 let mut has_inner_quantifier = false;
967 let chars: Vec<char> = pattern.chars().collect();
968 // SECURITY (R8-5): Use a skip_next flag to correctly handle escape
969 // sequences. The previous approach checked chars[i-1] == '\\' but
970 // failed for double-escapes like `\\\\(` (literal backslash + open paren).
971 let mut skip_next = false;
972
973 // Track alternation branches within groups to detect overlapping alternation.
974 // SECURITY (FIND-R46-007): Detect `(branch1|branch2)+` where branches share
975 // a common prefix, which can cause backtracking even without nested quantifiers.
976 let mut group_has_alternation = false;
977
978 for i in 0..chars.len() {
979 if skip_next {
980 skip_next = false;
981 continue;
982 }
983 match chars[i] {
984 '\\' => {
985 // Skip the NEXT character (the escaped one)
986 skip_next = true;
987 continue;
988 }
989 '(' => {
990 paren_depth += 1;
991 has_inner_quantifier = false;
992 group_has_alternation = false;
993 }
994 ')' => {
995 paren_depth -= 1;
996 // SECURITY (FIND-R58-ENG-002): Reject unbalanced closing parens.
997 // Negative paren_depth disables alternation/inner-quantifier
998 // tracking, allowing ReDoS patterns to bypass the safety check.
999 if paren_depth < 0 {
1000 return Err(format!(
1001 "Invalid regex pattern — unbalanced parentheses: '{}'",
1002 &pattern[..pattern.len().min(100)]
1003 ));
1004 }
1005 // Check if the next char is a quantifier
1006 if i + 1 < chars.len() && quantifiers.contains(&chars[i + 1]) {
1007 if has_inner_quantifier {
1008 return Err(format!(
1009 "Regex pattern contains nested quantifiers (potential ReDoS): '{}'",
1010 &pattern[..pattern.len().min(100)]
1011 ));
1012 }
1013 // FIND-R46-007: Alternation with a quantifier on the group
1014 // can cause backtracking if branches overlap.
1015 if group_has_alternation {
1016 return Err(format!(
1017 "Regex pattern contains alternation with outer quantifier (potential ReDoS): '{}'",
1018 &pattern[..pattern.len().min(100)]
1019 ));
1020 }
1021 }
1022 }
1023 '|' if paren_depth > 0 => {
1024 group_has_alternation = true;
1025 }
1026 c if quantifiers.contains(&c) && paren_depth > 0 => {
1027 has_inner_quantifier = true;
1028 }
1029 _ => {}
1030 }
1031 }
1032
1033 // SECURITY (FIND-R58-ENG-004): Reject patterns with unclosed parentheses.
1034 if paren_depth != 0 {
1035 return Err(format!(
1036 "Invalid regex pattern — unbalanced parentheses ({} unclosed): '{}'",
1037 paren_depth,
1038 &pattern[..pattern.len().min(100)]
1039 ));
1040 }
1041
1042 Ok(())
1043 }
1044
1045 /// Compile a regex pattern and test whether it matches the input.
1046 ///
1047 /// Legacy path: compiles the pattern on each call (no caching).
1048 /// For zero-overhead evaluation, use `with_policies()` to pre-compile.
1049 ///
1050 /// Validates the pattern for ReDoS safety before compilation (H2).
1051 fn regex_is_match(
1052 &self,
1053 pattern: &str,
1054 input: &str,
1055 policy_id: &str,
1056 ) -> Result<bool, EngineError> {
1057 Self::validate_regex_safety(pattern).map_err(|reason| EngineError::InvalidCondition {
1058 policy_id: policy_id.to_string(),
1059 reason,
1060 })?;
1061 let re = Regex::new(pattern).map_err(|e| EngineError::InvalidCondition {
1062 policy_id: policy_id.to_string(),
1063 reason: format!("Invalid regex pattern '{pattern}': {e}"),
1064 })?;
1065 Ok(re.is_match(input))
1066 }
1067
1068 /// Compile a glob pattern and test whether it matches the input.
1069 ///
1070 /// Legacy path: compiles the pattern on each call (no caching).
1071 /// For zero-overhead evaluation, use `with_policies()` to pre-compile.
1072 fn glob_is_match(
1073 &self,
1074 pattern: &str,
1075 input: &str,
1076 policy_id: &str,
1077 ) -> Result<bool, EngineError> {
1078 // SECURITY: On poisoned read lock, treat as cache miss rather than
1079 // accessing potentially corrupted data. The pattern will be compiled fresh.
1080 {
1081 let cache_result = self.glob_matcher_cache.read();
1082 match cache_result {
1083 Ok(cache) => {
1084 if let Some(matcher) = cache.get(pattern) {
1085 return Ok(matcher.is_match(input));
1086 }
1087 }
1088 Err(e) => {
1089 tracing::warn!(
1090 "glob_matcher_cache read lock poisoned, treating as cache miss: {}",
1091 e
1092 );
1093 // Fall through to compile the pattern fresh
1094 }
1095 }
1096 }
1097
1098 let matcher = Glob::new(pattern)
1099 .map_err(|e| EngineError::InvalidCondition {
1100 policy_id: policy_id.to_string(),
1101 reason: format!("Invalid glob pattern '{pattern}': {e}"),
1102 })?
1103 .compile_matcher();
1104 let is_match = matcher.is_match(input);
1105
1106 // SECURITY: On poisoned write lock, skip cache insertion rather than
1107 // writing into potentially corrupted state. The result is still correct,
1108 // just not cached.
1109 let cache_write = self.glob_matcher_cache.write();
1110 let mut cache = match cache_write {
1111 Ok(guard) => guard,
1112 Err(e) => {
1113 tracing::warn!(
1114 "glob_matcher_cache write lock poisoned, skipping cache insert: {}",
1115 e
1116 );
1117 return Ok(is_match);
1118 }
1119 };
1120 // FIND-R58-ENG-011: Full cache.clear() can cause a thundering herd of
1121 // recompilation on the legacy (non-precompiled) path. For production,
1122 // use with_policies() to pre-compile patterns and avoid this cache entirely.
1123 if cache.len() >= MAX_GLOB_MATCHER_CACHE_ENTRIES {
1124 // SECURITY (P3-ENG-004): Warn on cache eviction so cache thrashing is
1125 // observable in logs. This indicates a policy set with more unique glob
1126 // patterns than MAX_GLOB_MATCHER_CACHE_ENTRIES, which causes repeated
1127 // recompilation and may indicate a misconfiguration or DoS attempt.
1128 tracing::warn!(
1129 capacity = MAX_GLOB_MATCHER_CACHE_ENTRIES,
1130 "glob_matcher_cache capacity exceeded — clearing cache (cache thrashing possible; prefer with_policies() to pre-compile patterns)"
1131 );
1132 cache.clear();
1133 }
1134 cache.insert(pattern.to_string(), matcher);
1135
1136 Ok(is_match)
1137 }
1138
1139 /// Retrieve a parameter value by dot-separated path.
1140 ///
1141 /// Supports both simple keys (`"path"`) and nested paths (`"config.output.path"`).
1142 ///
1143 /// **Resolution order** (Exploit #5 fix): When the path contains dots, the function
1144 /// checks both an exact key match (e.g., `params["config.path"]`) and dot-split
1145 /// traversal (e.g., `params["config"]["path"]`).
1146 ///
1147 /// **Ambiguity handling (fail-closed):** If both interpretations resolve to different
1148 /// values, the function returns `None`. This prevents an attacker from shadowing a
1149 /// nested value with a literal dotted key (or vice versa). The `None` triggers
1150 /// deny behavior through the constraint's `on_missing` handling.
1151 ///
1152 /// When only one interpretation resolves, that value is returned.
1153 /// When both resolve to the same value, that value is returned.
1154 ///
1155 /// IMPROVEMENT_PLAN 4.1: Also supports bracket notation for array access:
1156 /// - `items[0]` — access first element of array "items"
1157 /// - `config.items[0].path` — traverse nested path with array access
1158 /// - `matrix[0][1]` — multi-dimensional array access
1159 pub fn get_param_by_path<'a>(
1160 params: &'a serde_json::Value,
1161 path: &str,
1162 ) -> Option<&'a serde_json::Value> {
1163 let exact_match = params.get(path);
1164
1165 // For non-dotted paths without brackets, exact match is the only interpretation
1166 if !path.contains('.') && !path.contains('[') {
1167 return exact_match;
1168 }
1169
1170 // Try dot-split traversal for nested objects with bracket notation support
1171 let traversal_match = Self::traverse_path(params, path);
1172
1173 match (exact_match, traversal_match) {
1174 // Both exist but differ: ambiguous — fail-closed (return None)
1175 (Some(exact), Some(traversal)) if exact != traversal => None,
1176 // Both exist and are equal: no ambiguity
1177 (Some(exact), Some(_)) => Some(exact),
1178 // Only one interpretation resolves
1179 (Some(exact), None) => Some(exact),
1180 (None, Some(traversal)) => Some(traversal),
1181 (None, None) => None,
1182 }
1183 }
1184
1185 /// Traverse a JSON value using a path with dot notation and bracket notation.
1186 ///
1187 /// Supports:
1188 /// - `foo.bar` — nested object access
1189 /// - `items[0]` — array index access
1190 /// - `foo.items[0].bar` — mixed traversal
1191 /// - `matrix[0][1]` — consecutive array access
1192 fn traverse_path<'a>(
1193 params: &'a serde_json::Value,
1194 path: &str,
1195 ) -> Option<&'a serde_json::Value> {
1196 let mut current = params;
1197
1198 // Split by dots first, then handle bracket notation within each segment
1199 for segment in path.split('.') {
1200 if segment.is_empty() {
1201 continue;
1202 }
1203
1204 // Check for bracket notation: field[index] or just [index]
1205 if let Some(bracket_pos) = segment.find('[') {
1206 // Get the field name before the bracket (may be empty for [0][1] style)
1207 let field_name = &segment[..bracket_pos];
1208
1209 // If there's a field name, traverse into it first
1210 if !field_name.is_empty() {
1211 current = current.get(field_name)?;
1212 }
1213
1214 // Parse all bracket indices in this segment: [0][1][2]...
1215 let mut rest = &segment[bracket_pos..];
1216 while rest.starts_with('[') {
1217 let close_pos = rest.find(']')?;
1218 let index_str = &rest[1..close_pos];
1219 let index: usize = index_str.parse().ok()?;
1220
1221 // Access array element
1222 current = current.get(index)?;
1223
1224 // Move past this bracket pair
1225 rest = &rest[close_pos + 1..];
1226 }
1227
1228 // If there's remaining content after brackets, it's malformed
1229 if !rest.is_empty() {
1230 return None;
1231 }
1232 } else {
1233 // Simple field access
1234 current = current.get(segment)?;
1235 }
1236 }
1237
1238 Some(current)
1239 }
1240
1241 /// Maximum number of string values to collect during recursive parameter scanning.
1242 /// Prevents DoS from parameters with thousands of nested string values.
1243 const MAX_SCAN_VALUES: usize = 500;
1244
1245 /// Maximum nesting depth for recursive parameter scanning.
1246 ///
1247 /// 32 levels is sufficient for any reasonable MCP tool parameter structure
1248 /// (typical JSON has 3-5 levels; 32 provides ample headroom). Objects or
1249 /// arrays nested beyond this depth are silently skipped — their string
1250 /// values will not be collected for constraint evaluation or DLP scanning.
1251 /// This prevents stack/memory exhaustion from attacker-crafted deeply nested JSON.
1252 const MAX_JSON_DEPTH: usize = 32;
1253
1254 /// Maximum work stack size for iterative JSON traversal.
1255 ///
1256 /// SECURITY (FIND-R168-003): Caps the iterative traversal stack to prevent
1257 /// transient memory spikes from flat JSON objects/arrays with many children.
1258 /// Without this, a 1MB JSON with 100K keys at depth 0 would push all 100K
1259 /// items before the depth/results checks trigger.
1260 const MAX_STACK_SIZE: usize = 10_000;
1261
1262 /// Recursively collect all string values from a JSON structure.
1263 ///
1264 /// Returns a list of `(path, value)` pairs where `path` is a dot-separated
1265 /// description of where the value was found (e.g., `"options.target"`), and
1266 /// a boolean indicating whether results were truncated at [`MAX_SCAN_VALUES`].
1267 /// Uses an iterative approach to avoid stack overflow on deep JSON.
1268 ///
1269 /// Bounded by [`MAX_SCAN_VALUES`] total values and [`MAX_JSON_DEPTH`] nesting depth.
1270 ///
1271 /// SECURITY (R234-ENG-4): Returns truncation flag so callers can fail-closed
1272 /// when the parameter space exceeds scan capacity.
1273 fn collect_all_string_values(params: &serde_json::Value) -> (Vec<(String, &str)>, bool) {
1274 // Pre-allocate for typical parameter sizes; bounded by MAX_SCAN_VALUES
1275 let mut results = Vec::with_capacity(16);
1276 let mut truncated = false;
1277 // Stack: (value, current_path, depth)
1278 let mut stack: Vec<(&serde_json::Value, String, usize)> = vec![(params, String::new(), 0)];
1279
1280 while let Some((val, path, depth)) = stack.pop() {
1281 if results.len() >= Self::MAX_SCAN_VALUES {
1282 truncated = true;
1283 break;
1284 }
1285 match val {
1286 serde_json::Value::String(s) => {
1287 if !path.is_empty() {
1288 results.push((path, s.as_str()));
1289 }
1290 }
1291 serde_json::Value::Object(obj) => {
1292 if depth >= Self::MAX_JSON_DEPTH {
1293 continue;
1294 }
1295 for (key, child) in obj {
1296 // SECURITY (FIND-R168-003): Bound stack inside push loop.
1297 if stack.len() >= Self::MAX_STACK_SIZE {
1298 break;
1299 }
1300 let child_path = if path.is_empty() {
1301 key.clone()
1302 } else {
1303 let mut p = String::with_capacity(path.len() + 1 + key.len());
1304 p.push_str(&path);
1305 p.push('.');
1306 p.push_str(key);
1307 p
1308 };
1309 stack.push((child, child_path, depth + 1));
1310 }
1311 }
1312 serde_json::Value::Array(arr) => {
1313 if depth >= Self::MAX_JSON_DEPTH {
1314 continue;
1315 }
1316 for (i, child) in arr.iter().enumerate() {
1317 if stack.len() >= Self::MAX_STACK_SIZE {
1318 break;
1319 }
1320 let child_path = if path.is_empty() {
1321 format!("[{i}]")
1322 } else {
1323 format!("{path}[{i}]")
1324 };
1325 stack.push((child, child_path, depth + 1));
1326 }
1327 }
1328 _ => {}
1329 }
1330 }
1331
1332 (results, truncated)
1333 }
1334
1335 /// Convert an `on_match` action string into a Verdict.
1336 fn make_constraint_verdict(on_match: &str, reason: &str) -> Result<Verdict, EngineError> {
1337 match on_match {
1338 "deny" => Ok(Verdict::Deny {
1339 reason: reason.to_string(),
1340 }),
1341 "require_approval" => Ok(Verdict::RequireApproval {
1342 reason: reason.to_string(),
1343 }),
1344 "allow" => Ok(Verdict::Allow),
1345 other => Err(EngineError::EvaluationError(format!(
1346 "Unknown on_match action: '{other}'"
1347 ))),
1348 }
1349 }
1350 /// Returns true if any compiled policy has IP rules configured.
1351 ///
1352 /// Used by proxy layers to skip DNS resolution when no policies require it.
1353 pub fn has_ip_rules(&self) -> bool {
1354 self.compiled_policies
1355 .iter()
1356 .any(|cp| cp.compiled_ip_rules.is_some())
1357 }
1358}
1359
1360#[cfg(test)]
1361#[allow(deprecated)] // evaluate_action_with_context: migration tracked in FIND-CREATIVE-005
1362#[path = "engine_tests.rs"]
1363mod tests;