pub const MAX_BRACE_ALTS: usize = 1024;
#[derive(Clone, Debug)]
pub struct Pattern {
src: String,
alts: Vec<Vec<Tok>>,
}
#[derive(Clone, Debug)]
enum Tok {
Star,
AnyOne,
Lit(u8),
Class {
negated: bool,
items: Vec<ClassItem>,
},
}
#[derive(Clone, Debug)]
enum ClassItem {
Byte(u8),
Range(u8, u8),
}
enum E {
Ch(u8),
Dash,
}
impl Pattern {
#[must_use]
pub fn compile(pat: &str) -> Self {
let alts = expand_braces(pat).iter().map(|s| compile_alt(s)).collect();
Self {
src: pat.to_owned(),
alts,
}
}
#[must_use]
pub fn matches(&self, s: &str) -> bool {
let bytes = s.as_bytes();
self.alts.iter().any(|toks| match_one(toks, bytes))
}
#[must_use]
pub fn as_str(&self) -> &str {
&self.src
}
#[must_use]
pub fn alt_count(&self) -> usize {
self.alts.len()
}
}
impl std::fmt::Display for Pattern {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(&self.src)
}
}
impl PartialEq for Pattern {
fn eq(&self, other: &Self) -> bool {
self.src == other.src
}
}
impl Eq for Pattern {}
fn match_one(toks: &[Tok], s: &[u8]) -> bool {
let (mut ti, mut si) = (0usize, 0usize);
let mut star: Option<usize> = None;
let mut star_s = 0usize;
while si < s.len() {
match toks.get(ti) {
Some(Tok::Star) => {
star = Some(ti);
star_s = si;
ti += 1;
}
Some(t) if tok_matches(t, s[si]) => {
ti += 1;
si += 1;
}
_ => {
if let Some(sp) = star {
ti = sp + 1;
star_s += 1;
si = star_s;
} else {
return false;
}
}
}
}
while matches!(toks.get(ti), Some(Tok::Star)) {
ti += 1;
}
ti == toks.len()
}
fn tok_matches(t: &Tok, b: u8) -> bool {
match t {
Tok::Star => false, Tok::AnyOne => true,
Tok::Lit(c) => *c == b,
Tok::Class { negated, items } => {
let inside = items.iter().any(|it| match it {
ClassItem::Byte(x) => *x == b,
ClassItem::Range(lo, hi) => *lo <= b && b <= *hi,
});
inside ^ negated
}
}
}
fn compile_alt(s: &str) -> Vec<Tok> {
let b = s.as_bytes();
let mut toks = Vec::new();
let mut i = 0;
while i < b.len() {
match b[i] {
b'\\' => {
if i + 1 < b.len() {
toks.push(Tok::Lit(b[i + 1]));
i += 2;
} else {
toks.push(Tok::Lit(b'\\')); i += 1;
}
}
b'*' => {
toks.push(Tok::Star);
i += 1;
}
b'?' => {
toks.push(Tok::AnyOne);
i += 1;
}
b'[' => {
if let Some((class, next)) = parse_class(b, i) {
toks.push(class);
i = next;
} else {
toks.push(Tok::Lit(b'[')); i += 1;
}
}
c => {
toks.push(Tok::Lit(c));
i += 1;
}
}
}
toks
}
fn parse_class(b: &[u8], start: usize) -> Option<(Tok, usize)> {
let mut i = start + 1;
let mut negated = false;
if b.get(i) == Some(&b'!') {
negated = true;
i += 1;
}
let mut elems: Vec<E> = Vec::new();
let mut first = true;
loop {
let c = *b.get(i)?; match c {
b'\\' => {
if let Some(&n) = b.get(i + 1) {
elems.push(E::Ch(n));
i += 2;
} else {
elems.push(E::Ch(b'\\'));
i += 1;
}
}
b']' if first => {
elems.push(E::Ch(b']'));
i += 1;
}
b']' => {
i += 1;
break;
}
b'-' => {
elems.push(E::Dash);
i += 1;
}
other => {
elems.push(E::Ch(other));
i += 1;
}
}
first = false;
}
let mut items: Vec<ClassItem> = Vec::new();
let mut k = 0;
while k < elems.len() {
match elems[k] {
E::Ch(lo) => {
if matches!(elems.get(k + 1), Some(E::Dash)) {
if let Some(E::Ch(hi)) = elems.get(k + 2) {
items.push(ClassItem::Range(lo, *hi));
k += 3;
continue;
}
}
items.push(ClassItem::Byte(lo));
k += 1;
}
E::Dash => {
items.push(ClassItem::Byte(b'-'));
k += 1;
}
}
}
Some((Tok::Class { negated, items }, i))
}
fn expand_braces(pat: &str) -> Vec<String> {
expand_braces_capped(pat, MAX_BRACE_ALTS).unwrap_or_else(|| vec![pat.to_owned()])
}
fn expand_braces_capped(pat: &str, cap: usize) -> Option<Vec<String>> {
let b = pat.as_bytes();
let Some(open) = find_unescaped(b, 0, b'{') else {
return Some(vec![pat.to_owned()]);
};
let Some(close) = find_unescaped(b, open + 1, b'}') else {
return Some(vec![pat.to_owned()]);
};
let prefix = &pat[..open];
let alts = &pat[open + 1..close];
let suffix = &pat[close + 1..];
let tails = expand_braces_capped(suffix, cap)?;
let branches = split_unescaped(alts, b',');
match branches.len().checked_mul(tails.len()) {
Some(n) if n <= cap => {}
_ => return None,
}
let mut out = Vec::with_capacity(branches.len() * tails.len());
for alt in branches {
for tail in &tails {
out.push(format!("{prefix}{alt}{tail}"));
}
}
Some(out)
}
fn find_unescaped(b: &[u8], from: usize, needle: u8) -> Option<usize> {
let mut i = from;
while i < b.len() {
match b[i] {
b'\\' => i += 2, c if c == needle => return Some(i),
_ => i += 1,
}
}
None
}
fn split_unescaped(s: &str, sep: u8) -> Vec<&str> {
let b = s.as_bytes();
let mut out = Vec::new();
let (mut start, mut i) = (0usize, 0usize);
while i < b.len() {
match b[i] {
b'\\' => i += 2,
c if c == sep => {
out.push(&s[start..i]);
i += 1;
start = i;
}
_ => i += 1,
}
}
out.push(&s[start..]);
out
}
#[cfg(test)]
mod tests {
use super::{MAX_BRACE_ALTS, Pattern, expand_braces};
use proptest::prelude::*;
fn m(pat: &str, s: &str) -> bool {
Pattern::compile(pat).matches(s)
}
#[test]
fn literal_and_anchors() {
assert!(m("abc", "abc"));
assert!(!m("abc", "abd"));
assert!(!m("abc", "abcd"));
assert!(!m("abc", "ab"));
}
#[test]
fn star_spans_anything_including_slash() {
assert!(m("*", ""));
assert!(m("*", "anything/at/all"));
assert!(m("a*c", "ac"));
assert!(m("a*c", "abbbc"));
assert!(m("*/tests/*", "src/pkg/tests/foo.rs"));
assert!(!m("*/tests/*", "src/pkg/test/foo.rs"));
}
#[test]
fn question_matches_single_byte() {
assert!(m("a?c", "abc"));
assert!(!m("a?c", "ac"));
assert!(!m("a?c", "abbc"));
assert!(m("??", "é"));
assert!(!m("?", "é"));
}
#[test]
fn collapsed_stars_do_not_blow_up() {
let pat = "a*a*a*a*a*a*a*a*b";
let s = "a".repeat(64);
assert!(!m(pat, &s));
assert!(m("***", "anything"));
}
#[test]
fn char_classes() {
assert!(m("[a-z]bc", "xbc"));
assert!(!m("[a-z]bc", "Xbc"));
assert!(m("test_[0-9]*", "test_42x"));
assert!(m("[!0-9]", "a"));
assert!(!m("[!0-9]", "7"));
assert!(m("[]a]", "]"));
assert!(m("[]a]", "a"));
assert!(m("[a-]", "a"));
assert!(m("[a-]", "-"));
assert!(m("[-a]", "-"));
}
#[test]
fn escaping_literals() {
assert!(m(r"\*", "*"));
assert!(!m(r"\*", "anything"));
assert!(m(r"\[lit\]", "[lit]"));
assert!(m(r"a\?b", "a?b"));
assert!(!m(r"a\?b", "axb"));
assert_eq!(Pattern::compile(r"\{a,b\}").alt_count(), 1);
assert!(m(r"\{a,b\}", "{a,b}"));
}
#[test]
fn brace_expansion_covers_all_conventions() {
let e = expand_braces("*/{test,tests,__tests__}/*");
assert_eq!(e.len(), 3);
assert!(e.contains(&"*/test/*".to_owned()));
assert!(e.contains(&"*/tests/*".to_owned()));
assert!(e.contains(&"*/__tests__/*".to_owned()));
}
#[test]
fn brace_alternation_matches() {
let p = Pattern::compile("*/{test,tests,__tests__}/*");
assert!(p.matches("packages/compiler-cli/test/compliance/foo.ts"));
assert!(p.matches("packages/svelte/tests/css/test.ts"));
assert!(p.matches("packages/next/src/__tests__/foo.test.ts"));
assert!(!p.matches("src/components/Button.ts"));
}
#[test]
fn file_extension_alternation() {
let p = Pattern::compile("*.{test,spec}.*");
assert!(p.matches("src/foo.test.ts"));
assert!(p.matches("src/foo.spec.ts"));
assert!(!p.matches("src/foo.ts"));
}
#[test]
fn empty_alternation_branch_is_legal() {
let e = expand_braces("*/{,s}post*");
assert!(e.contains(&"*/post*".to_owned()));
assert!(e.contains(&"*/spost*".to_owned()));
}
#[test]
fn two_groups_compose() {
let e = expand_braces("{a,b}{c,d}");
assert_eq!(e.len(), 4);
for want in ["ac", "ad", "bc", "bd"] {
assert!(e.contains(&want.to_owned()), "missing {want}");
}
}
#[test]
fn no_braces_is_one_alternative() {
assert_eq!(Pattern::compile("*tests/*").alt_count(), 1);
assert_eq!(expand_braces("*tests/*"), vec!["*tests/*".to_owned()]);
}
#[test]
fn unmatched_brace_is_literal() {
assert!(m("a{b", "a{b"));
assert_eq!(Pattern::compile("a{b").alt_count(), 1);
}
#[test]
fn brace_bomb_degrades_to_literal() {
let bomb = "{a,b}".repeat(40); let p = Pattern::compile(&bomb);
assert_eq!(p.alt_count(), 1);
assert!(p.matches(&bomb)); }
#[test]
fn cap_boundary() {
let at = "{a,b}".repeat(10);
assert_eq!(Pattern::compile(&at).alt_count(), MAX_BRACE_ALTS);
let over = "{a,b}".repeat(11);
assert_eq!(Pattern::compile(&over).alt_count(), 1);
}
proptest! {
#[test]
fn compile_is_total_and_bounded(pat in ".*") {
let p = Pattern::compile(&pat);
prop_assert!(p.alt_count() >= 1);
prop_assert!(p.alt_count() <= MAX_BRACE_ALTS);
}
#[test]
fn matching_never_panics(pat in ".*", text in ".*") {
let _ = Pattern::compile(&pat).matches(&text);
}
#[test]
fn literal_is_self_anchored(lit in "[a-z0-9/._-]{0,32}") {
let p = Pattern::compile(&lit);
let longer = format!("{lit}x");
prop_assert!(p.matches(&lit));
prop_assert!(!p.matches(&longer));
}
#[test]
fn brace_strings_stay_bounded(n in 0usize..64) {
let p = Pattern::compile(&"{a,b}".repeat(n));
prop_assert!(p.alt_count() <= MAX_BRACE_ALTS);
}
}
}