1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
//! TrustForge glob matching: the pattern language used by capability
//! target sets, negative-capability targets, and policy `target_patterns`.
//!
//! Semantics (canonical since B8):
//! - `*` matches any run (possibly empty) of characters **except `/`**
//! - `**` matches any run (possibly empty) of characters, including `/`
//! - every other character, `?` included, matches itself literally
//!
//! Previously each call site converted the glob to a regex and matched
//! with the `regex` crate; the three private copies had drifted (two
//! still byte-iterated, corrupting non-ASCII patterns, and passed `?`
//! through as a regex quantifier). This module is the single
//! implementation. It matches directly — no regex, no compilation step —
//! in O(pattern × value) worst case with plain DP, so untrusted patterns
//! cannot trigger pathological backtracking.
/// Match `value` against a TrustForge glob `pattern`.
pub fn glob_match(pattern: &str, value: &str) -> bool {
enum Tok {
/// `**` — any run, `/` included.
Any,
/// `*` — any run without `/`.
AnySegment,
Lit(char),
}
let mut toks = Vec::with_capacity(pattern.len());
let mut chars = pattern.chars().peekable();
while let Some(c) = chars.next() {
if c == '*' {
if chars.peek() == Some(&'*') {
chars.next();
toks.push(Tok::Any);
} else {
toks.push(Tok::AnySegment);
}
} else {
toks.push(Tok::Lit(c));
}
}
let vals: Vec<char> = value.chars().collect();
// dp[j] = "the tokens consumed so far can match vals[..j]".
let mut dp = vec![false; vals.len() + 1];
dp[0] = true;
for tok in &toks {
let mut next = vec![false; vals.len() + 1];
match tok {
Tok::Lit(l) => {
for j in 0..vals.len() {
next[j + 1] = dp[j] && vals[j] == *l;
}
}
Tok::Any => {
let mut reachable = false;
for j in 0..=vals.len() {
reachable = reachable || dp[j];
next[j] = reachable;
}
}
Tok::AnySegment => {
// Reachability propagates rightward but stops at `/`.
let mut reachable = false;
for j in 0..=vals.len() {
reachable = reachable || dp[j];
next[j] = reachable;
if j < vals.len() && vals[j] == '/' {
reachable = false;
}
}
}
}
dp = next;
}
dp[vals.len()]
}
#[cfg(test)]
mod tests {
use super::glob_match;
#[test]
fn literals() {
assert!(glob_match("files.read", "files.read"));
assert!(!glob_match("files.read", "files.write"));
assert!(!glob_match("files.read", "files.read2"));
assert!(!glob_match("files.read2", "files.read"));
assert!(glob_match("", ""));
assert!(!glob_match("", "a"));
}
#[test]
fn single_star_stops_at_slash() {
assert!(glob_match("repo/*", "repo/main"));
assert!(!glob_match("repo/*", "repo/main/file"));
assert!(glob_match("repo/*/file", "repo/main/file"));
assert!(glob_match("*.rs", "lib.rs"));
assert!(!glob_match("*.rs", "src/lib.rs"));
assert!(glob_match("a*", "a"));
}
#[test]
fn double_star_crosses_slash() {
assert!(glob_match("repo/**", "repo/main/deep/file"));
assert!(glob_match("repo/**", "repo/"));
assert!(glob_match("**/file", "repo/main/file"));
assert!(glob_match("**", ""));
assert!(glob_match("**", "anything/at/all"));
}
#[test]
fn mixed_stars_requiring_full_backtracking() {
// A single-backtrack matcher gets this wrong: `**` must grow
// even though a later `*` hit the mismatch.
assert!(glob_match("**/a*b", "x/a/ab"));
// `/` is literal here: `**` does not swallow the separator.
assert!(!glob_match("**/a*b", "aXb"));
assert!(!glob_match("**/a*b", "x/a/a"));
assert!(glob_match("*a*a*", "aaa"));
assert!(!glob_match("*a*a*", "a/a"));
assert!(glob_match("**a**a**", "a/a"));
}
#[test]
fn question_mark_is_literal() {
// B8: `?` is an ordinary character, not a wildcard.
assert!(glob_match("ok?", "ok?"));
assert!(!glob_match("ok?", "ok"));
assert!(!glob_match("ok?", "okX"));
}
#[test]
fn regex_meta_is_literal() {
assert!(glob_match("a.b+c(d)", "a.b+c(d)"));
assert!(!glob_match("a.b", "axb"));
assert!(glob_match("[env]", "[env]"));
assert!(glob_match("^start$", "^start$"));
}
#[test]
fn non_ascii_patterns() {
// The old byte-based converters corrupted multi-byte chars.
assert!(glob_match("café/*", "café/menu"));
assert!(!glob_match("café/*", "cafe/menu"));
assert!(glob_match("*é", "résumé"));
}
}