Skip to main content

ta_changeset/
uri_pattern.rs

1// uri_pattern.rs — URI-aware pattern matching for selective approval.
2//
3// Matches artifact resource_uri values against user-provided patterns.
4// Safety-first: patterns are scoped by URI scheme so filesystem globs
5// can never accidentally match email or database URIs.
6//
7// Examples:
8//   "src/**"                → matches "fs://workspace/src/main.rs" (auto-prefixed)
9//   "fs://workspace/src/**" → matches "fs://workspace/src/main.rs" (explicit)
10//   "gmail://*"             → matches "gmail://inbox/msg-123" (email scheme)
11//   "src/**"                → does NOT match "gmail://inbox/src/draft" (scheme mismatch)
12
13use glob::{MatchOptions, Pattern};
14
15/// The default URI scheme + authority prefix for bare patterns.
16const FS_PREFIX: &str = "fs://workspace/";
17
18/// Match a pattern against a resource URI with scheme-awareness.
19///
20/// Rules:
21/// 1. If pattern has a scheme (`://`), match the full URI as a glob.
22/// 2. If pattern has no scheme (bare path like `src/**`), auto-prefix with
23///    `fs://workspace/` and only match `fs://` URIs.
24/// 3. Scheme mismatch = no match (safety invariant).
25/// 4. Invalid glob patterns never match (fail-closed).
26pub fn matches_uri(pattern: &str, uri: &str) -> bool {
27    if pattern.contains("://") {
28        // Explicit scheme — extract and compare schemes before globbing.
29        let pattern_scheme = scheme_of(pattern);
30        let uri_scheme = scheme_of(uri);
31        if pattern_scheme != uri_scheme {
32            return false;
33        }
34        glob_match(pattern, uri)
35    } else {
36        // Bare pattern — only match fs:// URIs.
37        if !uri.starts_with(FS_PREFIX) {
38            return false;
39        }
40        let full_pattern = format!("{}{}", FS_PREFIX, pattern);
41        glob_match(&full_pattern, uri)
42    }
43}
44
45/// Extract the scheme portion of a URI (everything before `://`).
46fn scheme_of(uri: &str) -> &str {
47    uri.split("://").next().unwrap_or("")
48}
49
50/// Glob-match a pattern against a target string. Fail-closed on invalid patterns.
51fn glob_match(pattern: &str, target: &str) -> bool {
52    let opts = MatchOptions {
53        require_literal_separator: true,
54        ..Default::default()
55    };
56    match Pattern::new(pattern) {
57        Ok(p) => p.matches_with(target, opts),
58        Err(_) => false,
59    }
60}
61
62/// Resolve a user-provided pattern into its full URI form.
63/// Useful for displaying what a pattern actually matches.
64pub fn resolve_pattern(pattern: &str) -> String {
65    if pattern.contains("://") {
66        pattern.to_string()
67    } else {
68        format!("{}{}", FS_PREFIX, pattern)
69    }
70}
71
72/// Filter a list of URIs by a set of patterns. Returns matching URIs.
73pub fn filter_uris<'a>(patterns: &[&str], uris: &[&'a str]) -> Vec<&'a str> {
74    uris.iter()
75        .filter(|uri| patterns.iter().any(|pat| matches_uri(pat, uri)))
76        .copied()
77        .collect()
78}
79
80#[cfg(test)]
81mod tests {
82    use super::*;
83
84    // ── Bare patterns (auto-prefix fs://workspace/) ──────────────
85
86    #[test]
87    fn bare_pattern_matches_fs_uri() {
88        assert!(matches_uri("src/**", "fs://workspace/src/main.rs"));
89        assert!(matches_uri("src/*.rs", "fs://workspace/src/lib.rs"));
90        assert!(matches_uri("README.md", "fs://workspace/README.md"));
91    }
92
93    #[test]
94    fn bare_pattern_does_not_match_other_schemes() {
95        // Safety: bare "src/**" must NOT match gmail URIs.
96        assert!(!matches_uri("src/**", "gmail://inbox/src/draft"));
97        assert!(!matches_uri("src/**", "drive://docs/src/readme"));
98        assert!(!matches_uri("*.rs", "db://tables/schema.rs"));
99    }
100
101    #[test]
102    fn bare_pattern_no_match_outside_scope() {
103        assert!(!matches_uri("src/**", "fs://workspace/tests/test.rs"));
104        assert!(!matches_uri("src/*.rs", "fs://workspace/src/sub/deep.rs"));
105    }
106
107    // ── Explicit scheme patterns ─────────────────────────────────
108
109    #[test]
110    fn explicit_fs_pattern_matches() {
111        assert!(matches_uri(
112            "fs://workspace/src/**",
113            "fs://workspace/src/main.rs"
114        ));
115        assert!(matches_uri(
116            "fs://workspace/**",
117            "fs://workspace/Cargo.toml"
118        ));
119    }
120
121    #[test]
122    fn explicit_gmail_pattern_matches() {
123        // Single * matches one path segment (require_literal_separator).
124        assert!(matches_uri("gmail://inbox/*", "gmail://inbox/msg-456"));
125        // ** matches across path separators.
126        assert!(matches_uri("gmail://**", "gmail://inbox/msg-123"));
127        // Single * does NOT cross path separators.
128        assert!(!matches_uri("gmail://*", "gmail://inbox/msg-123"));
129    }
130
131    #[test]
132    fn scheme_mismatch_never_matches() {
133        // fs:// pattern vs gmail:// URI.
134        assert!(!matches_uri("fs://workspace/**", "gmail://inbox/msg-123"));
135        // gmail:// pattern vs fs:// URI.
136        assert!(!matches_uri("gmail://*", "fs://workspace/src/main.rs"));
137    }
138
139    // ── Edge cases ───────────────────────────────────────────────
140
141    #[test]
142    fn exact_path_match() {
143        assert!(matches_uri("src/main.rs", "fs://workspace/src/main.rs"));
144        assert!(!matches_uri("src/main.rs", "fs://workspace/src/lib.rs"));
145    }
146
147    #[test]
148    fn double_star_matches_deep_paths() {
149        assert!(matches_uri(
150            "src/**",
151            "fs://workspace/src/deeply/nested/file.rs"
152        ));
153    }
154
155    #[test]
156    fn invalid_glob_pattern_never_matches() {
157        // Unclosed bracket — should fail-closed.
158        assert!(!matches_uri("[invalid", "fs://workspace/src/main.rs"));
159    }
160
161    #[test]
162    fn empty_pattern_does_not_match() {
163        assert!(!matches_uri("", "fs://workspace/src/main.rs"));
164    }
165
166    // ── Helpers ──────────────────────────────────────────────────
167
168    #[test]
169    fn resolve_bare_pattern() {
170        assert_eq!(resolve_pattern("src/**"), "fs://workspace/src/**");
171    }
172
173    #[test]
174    fn resolve_explicit_pattern_unchanged() {
175        assert_eq!(resolve_pattern("gmail://*"), "gmail://*");
176    }
177
178    #[test]
179    fn filter_uris_selects_matching() {
180        let uris = vec![
181            "fs://workspace/src/main.rs",
182            "fs://workspace/src/lib.rs",
183            "fs://workspace/tests/test.rs",
184            "gmail://inbox/msg-1",
185        ];
186        let matched = filter_uris(&["src/**"], &uris);
187        assert_eq!(matched.len(), 2);
188        assert!(matched.contains(&"fs://workspace/src/main.rs"));
189        assert!(matched.contains(&"fs://workspace/src/lib.rs"));
190    }
191
192    #[test]
193    fn filter_uris_multiple_patterns() {
194        let uris = vec![
195            "fs://workspace/src/main.rs",
196            "fs://workspace/tests/test.rs",
197            "gmail://inbox/msg-1",
198        ];
199        let matched = filter_uris(&["src/**", "gmail://**"], &uris);
200        assert_eq!(matched.len(), 2);
201        assert!(matched.contains(&"fs://workspace/src/main.rs"));
202        assert!(matched.contains(&"gmail://inbox/msg-1"));
203    }
204}