Skip to main content

vellaveto_engine/
matcher.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4//
5// Copyright 2026 Paolo Vella
6// SPDX-License-Identifier: MPL-2.0
7
8//! Tool and function pattern matching.
9//!
10//! This module provides pre-compiled pattern matchers for tool/function ID
11//! segments, used to efficiently match policies against actions at evaluation time.
12
13use crate::normalize::normalize_full;
14#[cfg(test)]
15use vellaveto_types::Action;
16
17/// Pre-compiled pattern matcher for tool/function ID segments.
18///
19/// SECURITY (FIND-SEM-003, R227-TYP-1): Pattern strings are normalized through
20/// `normalize_full()` (NFKC + lowercase + homoglyph mapping) at compile time.
21/// This prevents fullwidth Unicode, circled letters, and mathematical variants
22/// from bypassing exact-match Deny policies. The evaluation path must also
23/// normalize action tool/function names via `normalize_full()` before matching.
24#[derive(Debug, Clone)]
25pub enum PatternMatcher {
26    /// Matches anything ("*")
27    Any,
28    /// Exact string match (pattern is homoglyph-normalized at compile time)
29    Exact(String),
30    /// Prefix match ("prefix*") (normalize_full at compile time)
31    Prefix(String),
32    /// Suffix match ("*suffix") (normalize_full at compile time)
33    Suffix(String),
34}
35
36impl PatternMatcher {
37    pub(crate) fn compile(pattern: &str) -> Self {
38        if pattern == "*" {
39            PatternMatcher::Any
40        } else if let Some(suffix) = pattern.strip_prefix('*') {
41            // SECURITY (R30-ENG-5): Validate that the suffix doesn't contain
42            // another wildcard. Patterns like "*read*" would produce a suffix
43            // match for the literal string "read*", which is almost certainly
44            // not what the admin intended. Fail-closed: treat as Any (matches
45            // all) with a warning — over-matching is safer than under-matching.
46            if suffix.contains('*') {
47                tracing::warn!(
48                    pattern = pattern,
49                    "Unsupported infix/double wildcard pattern — treating as match-all (fail-closed)"
50                );
51                PatternMatcher::Any
52            } else {
53                // SECURITY (FIND-SEM-003, R227-TYP-1): normalize_full at compile time
54                PatternMatcher::Suffix(normalize_full(suffix))
55            }
56        } else if let Some(prefix) = pattern.strip_suffix('*') {
57            if prefix.contains('*') {
58                tracing::warn!(
59                    pattern = pattern,
60                    "Unsupported infix/double wildcard pattern — treating as match-all (fail-closed)"
61                );
62                PatternMatcher::Any
63            } else {
64                // SECURITY (FIND-SEM-003, R227-TYP-1): normalize_full at compile time
65                PatternMatcher::Prefix(normalize_full(prefix))
66            }
67        } else if pattern.contains('*') {
68            // Infix wildcard like "file_*_system" — not supported
69            tracing::warn!(
70                pattern = pattern,
71                "Unsupported infix wildcard pattern — treating as match-all (fail-closed)"
72            );
73            PatternMatcher::Any
74        } else {
75            // SECURITY (FIND-SEM-003, R227-TYP-1): normalize_full at compile time
76            PatternMatcher::Exact(normalize_full(pattern))
77        }
78    }
79
80    pub(crate) fn matches(&self, value: &str) -> bool {
81        match self {
82            PatternMatcher::Any => true,
83            PatternMatcher::Exact(s) => s == value,
84            PatternMatcher::Prefix(p) => value.starts_with(p.as_str()),
85            PatternMatcher::Suffix(s) => value.ends_with(s.as_str()),
86        }
87    }
88
89    /// Match against a pre-normalized value.
90    ///
91    /// SECURITY (FIND-SEM-003): Since patterns are normalized at compile time,
92    /// callers must pass homoglyph-normalized input for consistent matching.
93    /// This method is identical to `matches()` but makes the contract explicit.
94    pub(crate) fn matches_normalized(&self, normalized_value: &str) -> bool {
95        self.matches(normalized_value)
96    }
97}
98
99/// Pre-compiled tool:function matcher derived from policy ID.
100#[derive(Debug, Clone)]
101pub enum CompiledToolMatcher {
102    /// Matches all tools and functions ("*")
103    Universal,
104    /// Matches tool only (no colon in policy ID)
105    ToolOnly(PatternMatcher),
106    /// Matches tool:function with independent matchers
107    ToolAndFunction(PatternMatcher, PatternMatcher),
108}
109
110impl CompiledToolMatcher {
111    pub(crate) fn compile(id: &str) -> Self {
112        if id == "*" {
113            CompiledToolMatcher::Universal
114        } else if let Some((tool_pat, func_remainder)) = id.split_once(':') {
115            // Support qualifier suffixes: "tool:func:qualifier" → match on "tool:func" only
116            let func_pat = func_remainder
117                .split_once(':')
118                .map_or(func_remainder, |(f, _)| f);
119            CompiledToolMatcher::ToolAndFunction(
120                PatternMatcher::compile(tool_pat),
121                PatternMatcher::compile(func_pat),
122            )
123        } else {
124            CompiledToolMatcher::ToolOnly(PatternMatcher::compile(id))
125        }
126    }
127
128    #[cfg(test)]
129    pub(crate) fn matches(&self, action: &Action) -> bool {
130        match self {
131            CompiledToolMatcher::Universal => true,
132            CompiledToolMatcher::ToolOnly(m) => m.matches(&action.tool),
133            CompiledToolMatcher::ToolAndFunction(t, f) => {
134                t.matches(&action.tool) && f.matches(&action.function)
135            }
136        }
137    }
138
139    /// Match against pre-normalized tool and function names.
140    ///
141    /// SECURITY (FIND-SEM-003): Callers must pass homoglyph-normalized tool
142    /// and function names to ensure fullwidth Unicode characters cannot bypass
143    /// exact-match Deny policies.
144    pub(crate) fn matches_normalized(
145        &self,
146        normalized_tool: &str,
147        normalized_function: &str,
148    ) -> bool {
149        match self {
150            CompiledToolMatcher::Universal => true,
151            CompiledToolMatcher::ToolOnly(m) => m.matches_normalized(normalized_tool),
152            CompiledToolMatcher::ToolAndFunction(t, f) => {
153                t.matches_normalized(normalized_tool) && f.matches_normalized(normalized_function)
154            }
155        }
156    }
157}
158
159#[cfg(test)]
160mod tests {
161    use super::*;
162    use vellaveto_types::Action;
163
164    fn make_action(tool: &str, function: &str) -> Action {
165        Action {
166            tool: tool.to_string(),
167            function: function.to_string(),
168            parameters: serde_json::json!({}),
169            target_paths: Vec::new(),
170            target_domains: Vec::new(),
171            resolved_ips: Vec::new(),
172        }
173    }
174
175    #[test]
176    fn test_pattern_matcher_any() {
177        let matcher = PatternMatcher::compile("*");
178        assert!(matches!(matcher, PatternMatcher::Any));
179        assert!(matcher.matches("anything"));
180        assert!(matcher.matches(""));
181    }
182
183    #[test]
184    fn test_pattern_matcher_exact() {
185        let matcher = PatternMatcher::compile("read_file");
186        assert!(matches!(matcher, PatternMatcher::Exact(_)));
187        assert!(matcher.matches("read_file"));
188        assert!(!matcher.matches("read_file2"));
189        assert!(!matcher.matches("read"));
190    }
191
192    #[test]
193    fn test_pattern_matcher_prefix() {
194        let matcher = PatternMatcher::compile("read_*");
195        assert!(matches!(matcher, PatternMatcher::Prefix(_)));
196        assert!(matcher.matches("read_file"));
197        assert!(matcher.matches("read_directory"));
198        assert!(!matcher.matches("write_file"));
199    }
200
201    #[test]
202    fn test_pattern_matcher_suffix() {
203        let matcher = PatternMatcher::compile("*_file");
204        assert!(matches!(matcher, PatternMatcher::Suffix(_)));
205        assert!(matcher.matches("read_file"));
206        assert!(matcher.matches("write_file"));
207        assert!(!matcher.matches("read_directory"));
208    }
209
210    #[test]
211    fn test_pattern_matcher_infix_treated_as_any() {
212        // Infix wildcards are not supported, treated as match-all (fail-closed)
213        let matcher = PatternMatcher::compile("read_*_file");
214        assert!(matches!(matcher, PatternMatcher::Any));
215        assert!(matcher.matches("anything"));
216    }
217
218    #[test]
219    fn test_compiled_tool_matcher_universal() {
220        let matcher = CompiledToolMatcher::compile("*");
221        assert!(matches!(matcher, CompiledToolMatcher::Universal));
222        assert!(matcher.matches(&make_action("any_tool", "any_function")));
223    }
224
225    #[test]
226    fn test_compiled_tool_matcher_tool_only() {
227        let matcher = CompiledToolMatcher::compile("filesystem");
228        assert!(matches!(matcher, CompiledToolMatcher::ToolOnly(_)));
229        assert!(matcher.matches(&make_action("filesystem", "read")));
230        assert!(matcher.matches(&make_action("filesystem", "write")));
231        assert!(!matcher.matches(&make_action("network", "fetch")));
232    }
233
234    #[test]
235    fn test_compiled_tool_matcher_tool_and_function() {
236        let matcher = CompiledToolMatcher::compile("filesystem:read*");
237        assert!(matches!(
238            matcher,
239            CompiledToolMatcher::ToolAndFunction(_, _)
240        ));
241        assert!(matcher.matches(&make_action("filesystem", "read")));
242        assert!(matcher.matches(&make_action("filesystem", "read_file")));
243        assert!(!matcher.matches(&make_action("filesystem", "write")));
244        assert!(!matcher.matches(&make_action("network", "read")));
245    }
246
247    #[test]
248    fn test_compiled_tool_matcher_with_qualifier() {
249        // Qualifiers after second colon are ignored for matching
250        let matcher = CompiledToolMatcher::compile("filesystem:read:sensitive");
251        assert!(matcher.matches(&make_action("filesystem", "read")));
252        assert!(!matcher.matches(&make_action("filesystem", "read:sensitive")));
253    }
254
255    // ═══════════════════════════════════════════════════
256    // FIND-SEM-003: Homoglyph normalization tests
257    // ═══════════════════════════════════════════════════
258
259    #[test]
260    fn test_pattern_exact_normalizes_homoglyphs_at_compile() {
261        // Fullwidth "read" is normalized to "read" at compile time
262        let matcher = PatternMatcher::compile("\u{FF52}\u{FF45}\u{FF41}\u{FF44}");
263        assert!(matcher.matches("read"));
264        assert!(!matcher.matches("\u{FF52}\u{FF45}\u{FF41}\u{FF44}"));
265    }
266
267    #[test]
268    fn test_pattern_prefix_normalizes_homoglyphs() {
269        // Fullwidth prefix "read_*" → normalized to "read_*"
270        let matcher = PatternMatcher::compile("\u{FF52}\u{FF45}\u{FF41}\u{FF44}\u{FF3F}*");
271        assert!(matches!(matcher, PatternMatcher::Prefix(_)));
272        assert!(matcher.matches("read_file"));
273        assert!(matcher.matches("read_dir"));
274    }
275
276    #[test]
277    fn test_pattern_suffix_normalizes_homoglyphs() {
278        // "*_file" suffix → normalized to "*_file"
279        let matcher = PatternMatcher::compile("*\u{FF3F}\u{FF46}\u{FF49}\u{FF4C}\u{FF45}");
280        assert!(matches!(matcher, PatternMatcher::Suffix(_)));
281        assert!(matcher.matches("read_file"));
282        assert!(matcher.matches("write_file"));
283    }
284
285    #[test]
286    fn test_compiled_tool_matcher_normalized_method() {
287        let matcher = CompiledToolMatcher::compile("read_file");
288        // Normal ASCII match via matches_normalized
289        assert!(matcher.matches_normalized("read_file", "any"));
290        // Fullwidth input pre-normalized by caller using normalize_full
291        let norm = normalize_full(
292            "\u{FF52}\u{FF45}\u{FF41}\u{FF44}\u{FF3F}\u{FF46}\u{FF49}\u{FF4C}\u{FF45}",
293        );
294        assert_eq!(norm, "read_file");
295        assert!(matcher.matches_normalized(&norm, "any"));
296    }
297
298    #[test]
299    fn test_cyrillic_homoglyph_tool_name_normalized() {
300        // Policy blocks "admin" — Cyrillic "аdmin" (U+0430 Cyrillic а) should also match
301        let matcher = PatternMatcher::compile("admin");
302        let norm = normalize_full("\u{0430}dmin");
303        assert_eq!(norm, "admin");
304        assert!(matcher.matches_normalized(&norm));
305    }
306
307    /// R227-TYP-1: Circled letter bypass — NFKC decomposes Ⓑash to Bash, then lowercase.
308    #[test]
309    fn test_r227_circled_letter_normalized() {
310        let matcher = PatternMatcher::compile("bash");
311        // Ⓑ = U+24B7 (circled Latin capital B) → NFKC → B → lowercase → b
312        let norm = normalize_full("\u{24B7}ash");
313        assert_eq!(norm, "bash");
314        assert!(matcher.matches_normalized(&norm));
315    }
316}