#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PathspecElement {
pattern: Vec<u8>,
exclude: bool,
icase: bool,
literal: bool,
glob: bool,
top: bool,
attrs: Vec<Vec<u8>>,
}
impl PathspecElement {
pub fn parse(arg: &[u8], defaults: PathspecMatchMagic) -> Result<Self, PathspecParseError> {
let mut exclude = false;
let mut icase = defaults.icase;
let mut literal = defaults.literal;
let mut glob = defaults.glob;
let mut top = false;
let mut attrs: Vec<Vec<u8>> = Vec::new();
let rest = if let Some(after) = arg.strip_prefix(b":(") {
let close = after
.iter()
.position(|&c| c == b')')
.ok_or(PathspecParseError::UnterminatedMagic)?;
let magic = &after[..close];
for word in split_magic(magic) {
match word.as_slice() {
b"exclude" => exclude = true,
b"icase" => icase = true,
b"literal" => literal = true,
b"glob" => glob = true,
b"top" => top = true,
other => {
if let Some(attr) = other.strip_prefix(b"attr:") {
attrs.push(attr.to_vec());
} else if other.is_empty() {
} else {
return Err(PathspecParseError::UnknownMagic(other.to_vec()));
}
}
}
}
&after[close + 1..]
} else if let Some(after) = arg.strip_prefix(b":") {
let mut idx = 0;
while idx < after.len() {
match after[idx] {
b'!' | b'^' => exclude = true,
b'/' => top = true,
_ => break,
}
idx += 1;
}
&after[idx..]
} else {
arg
};
if glob && literal {
return Err(PathspecParseError::GlobLiteralConflict);
}
Ok(PathspecElement {
pattern: rest.to_vec(),
exclude,
icase,
literal,
glob,
top,
attrs,
})
}
pub fn is_exclude(&self) -> bool {
self.exclude
}
pub fn is_top(&self) -> bool {
self.top
}
pub fn attrs(&self) -> &[Vec<u8>] {
&self.attrs
}
pub fn is_icase(&self) -> bool {
self.icase
}
pub fn is_glob(&self) -> bool {
self.glob
}
pub fn pattern(&self) -> &[u8] {
&self.pattern
}
fn magic(&self) -> PathspecMatchMagic {
PathspecMatchMagic {
literal: self.literal,
glob: self.glob,
icase: self.icase,
}
}
pub fn matches_path(&self, name: &[u8]) -> bool {
pathspec_item_matches(&self.pattern, name, self.magic())
}
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct Pathspec {
elements: Vec<PathspecElement>,
}
impl Pathspec {
pub fn parse<I, S>(args: I, defaults: PathspecMatchMagic) -> Result<Self, PathspecParseError>
where
I: IntoIterator<Item = S>,
S: AsRef<[u8]>,
{
let mut elements = Vec::new();
for arg in args {
elements.push(PathspecElement::parse(arg.as_ref(), defaults)?);
}
Ok(Pathspec { elements })
}
pub fn is_empty(&self) -> bool {
self.elements.is_empty()
}
pub fn elements(&self) -> &[PathspecElement] {
&self.elements
}
pub fn matches(&self, path: &[u8]) -> bool {
if self.elements.is_empty() {
return true;
}
let mut have_include = false;
let mut included = false;
for element in &self.elements {
if element.exclude {
if element.matches_path(path) {
return false;
}
} else {
have_include = true;
if element.matches_path(path) {
included = true;
}
}
}
if have_include { included } else { true }
}
}
fn split_magic(body: &[u8]) -> Vec<Vec<u8>> {
body.split(|&c| c == b',').map(|w| w.to_vec()).collect()
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PathspecParseError {
UnterminatedMagic,
UnknownMagic(Vec<u8>),
GlobLiteralConflict,
}
impl core::fmt::Display for PathspecParseError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
PathspecParseError::UnterminatedMagic => {
write!(f, "Missing ')' at end of pathspec magic")
}
PathspecParseError::UnknownMagic(word) => {
write!(
f,
"Invalid pathspec magic '{}'",
String::from_utf8_lossy(word)
)
}
PathspecParseError::GlobLiteralConflict => {
write!(f, "'literal' and 'glob' are incompatible")
}
}
}
}
impl std::error::Error for PathspecParseError {}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub struct PathspecMatchMagic {
pub literal: bool,
pub glob: bool,
pub icase: bool,
}
fn is_glob_special(c: u8) -> bool {
matches!(c, b'*' | b'?' | b'[' | b'\\')
}
fn simple_length(s: &[u8]) -> usize {
for (i, &c) in s.iter().enumerate() {
if is_glob_special(c) {
return i;
}
}
s.len()
}
fn ps_strncmp(icase: bool, a: &[u8], b: &[u8], n: usize) -> bool {
let a = &a[..a.len().min(n)];
let b = &b[..b.len().min(n)];
if a.len() < n && b.len() < n && a.len() != b.len() {
return false;
}
let len = n.min(a.len()).min(b.len());
for i in 0..len {
let (mut ca, mut cb) = (a[i], b[i]);
if icase {
ca = ca.to_ascii_lowercase();
cb = cb.to_ascii_lowercase();
}
if ca != cb {
return false;
}
}
true
}
pub fn pathspec_is_glob(path: &[u8]) -> bool {
path.iter().any(|byte| matches!(byte, b'*' | b'?' | b'['))
}
pub fn pathspec_item_matches(match_: &[u8], name: &[u8], magic: PathspecMatchMagic) -> bool {
let icase = magic.icase;
let matchlen = match_.len();
let namelen = name.len();
let nowildcard_len = if magic.literal {
matchlen
} else {
simple_length(match_)
};
if matchlen == 0 {
return true;
}
if matchlen <= namelen && ps_strncmp(icase, match_, name, matchlen) {
if matchlen == namelen {
return true; }
if match_[matchlen - 1] == b'/' || name[matchlen] == b'/' {
return true; }
} else if match_[matchlen - 1] == b'/'
&& namelen == matchlen - 1
&& ps_strncmp(icase, match_, name, namelen)
{
return true;
}
if nowildcard_len < matchlen {
if nowildcard_len > 0 && !ps_strncmp(icase, match_, name, nowildcard_len) {
return false;
}
let pat = &match_[nowildcard_len..];
if name.len() < nowildcard_len {
return false;
}
let str_ = &name[nowildcard_len..];
let flags = if magic.glob && !magic.literal {
WM_PATHNAME | if icase { WM_CASEFOLD } else { 0 }
} else {
if icase { WM_CASEFOLD } else { 0 }
};
if wildmatch(pat, str_, flags) {
return true;
}
}
false
}
pub const WM_CASEFOLD: u32 = 1;
pub const WM_PATHNAME: u32 = 2;
const WM_MATCH: i32 = 0;
const WM_NOMATCH: i32 = 1;
const WM_ABORT_ALL: i32 = -1;
const WM_ABORT_TO_STARSTAR: i32 = -2;
#[inline]
fn wm_isascii(c: u8) -> bool {
c < 0x80
}
#[inline]
fn wm_isupper(c: u8) -> bool {
wm_isascii(c) && c.is_ascii_uppercase()
}
#[inline]
fn wm_islower(c: u8) -> bool {
wm_isascii(c) && c.is_ascii_lowercase()
}
#[inline]
fn wm_tolower(c: u8) -> u8 {
c.to_ascii_lowercase()
}
#[inline]
fn wm_toupper(c: u8) -> u8 {
c.to_ascii_uppercase()
}
#[inline]
fn wm_is_glob_special(c: u8) -> bool {
matches!(c, b'*' | b'?' | b'[' | b'\\')
}
fn wm_cc_eq(class: &[u8], lit: &[u8]) -> bool {
class == lit
}
fn wm_class_matches(class: &[u8], t_ch: u8, flags: u32) -> Option<bool> {
let m = if wm_cc_eq(class, b"alnum") {
wm_isascii(t_ch) && t_ch.is_ascii_alphanumeric()
} else if wm_cc_eq(class, b"alpha") {
wm_isascii(t_ch) && t_ch.is_ascii_alphabetic()
} else if wm_cc_eq(class, b"blank") {
wm_isascii(t_ch) && (t_ch == b' ' || t_ch == b'\t')
} else if wm_cc_eq(class, b"cntrl") {
wm_isascii(t_ch) && t_ch.is_ascii_control()
} else if wm_cc_eq(class, b"digit") {
wm_isascii(t_ch) && t_ch.is_ascii_digit()
} else if wm_cc_eq(class, b"graph") {
wm_isascii(t_ch) && t_ch.is_ascii_graphic()
} else if wm_cc_eq(class, b"lower") {
wm_islower(t_ch)
} else if wm_cc_eq(class, b"print") {
wm_isascii(t_ch) && (0x20..=0x7e).contains(&t_ch)
} else if wm_cc_eq(class, b"punct") {
wm_isascii(t_ch) && t_ch.is_ascii_punctuation()
} else if wm_cc_eq(class, b"space") {
wm_isascii(t_ch) && t_ch.is_ascii_whitespace()
} else if wm_cc_eq(class, b"upper") {
wm_isupper(t_ch) || ((flags & WM_CASEFOLD) != 0 && wm_islower(t_ch))
} else if wm_cc_eq(class, b"xdigit") {
wm_isascii(t_ch) && t_ch.is_ascii_hexdigit()
} else {
return None;
};
Some(m)
}
fn dowild(pattern: &[u8], text: &[u8], flags: u32) -> i32 {
let p = pattern;
let mut pi = 0usize;
let mut ti = 0usize;
while pi < p.len() {
let mut p_ch = p[pi];
let t_ch_raw = if ti < text.len() { text[ti] } else { 0 };
let mut t_ch = t_ch_raw;
if t_ch == 0 && p_ch != b'*' {
return WM_ABORT_ALL;
}
if (flags & WM_CASEFOLD) != 0 && wm_isupper(t_ch) {
t_ch = wm_tolower(t_ch);
}
if (flags & WM_CASEFOLD) != 0 && wm_isupper(p_ch) {
p_ch = wm_tolower(p_ch);
}
match p_ch {
b'?' => {
if (flags & WM_PATHNAME) != 0 && t_ch == b'/' {
return WM_NOMATCH;
}
pi += 1;
ti += 1;
continue;
}
b'*' => {
pi += 1;
let match_slash: bool;
if pi < p.len() && p[pi] == b'*' {
let prev_p = pi; while pi < p.len() && p[pi] == b'*' {
pi += 1;
}
if (flags & WM_PATHNAME) == 0 {
match_slash = true;
} else if (prev_p < 2 || p[prev_p - 2] == b'/')
&& (pi == p.len()
|| p[pi] == b'/'
|| (p[pi] == b'\\' && pi + 1 < p.len() && p[pi + 1] == b'/'))
{
if pi < p.len()
&& p[pi] == b'/'
&& dowild(&p[pi + 1..], &text[ti..], flags) == WM_MATCH
{
return WM_MATCH;
}
match_slash = true;
} else {
match_slash = false;
}
} else {
match_slash = (flags & WM_PATHNAME) == 0;
}
if pi == p.len() {
if !match_slash && text[ti..].contains(&b'/') {
return WM_ABORT_TO_STARSTAR;
}
return WM_MATCH;
} else if !match_slash && p[pi] == b'/' {
match text[ti..].iter().position(|&c| c == b'/') {
None => return WM_ABORT_ALL,
Some(off) => {
ti += off; }
}
pi += 1;
ti += 1;
continue;
}
let mut cur_t = ti;
loop {
let mut tc = if cur_t < text.len() { text[cur_t] } else { 0 };
if tc == 0 {
break;
}
if !wm_is_glob_special(p[pi]) {
let mut pc = p[pi];
if (flags & WM_CASEFOLD) != 0 && wm_isupper(pc) {
pc = wm_tolower(pc);
}
loop {
tc = if cur_t < text.len() { text[cur_t] } else { 0 };
if tc == 0 {
break;
}
if !(match_slash || tc != b'/') {
break;
}
let mut tcf = tc;
if (flags & WM_CASEFOLD) != 0 && wm_isupper(tcf) {
tcf = wm_tolower(tcf);
}
if tcf == pc {
break;
}
cur_t += 1;
}
let tc_cmp = {
let raw = if cur_t < text.len() { text[cur_t] } else { 0 };
if (flags & WM_CASEFOLD) != 0 && wm_isupper(raw) {
wm_tolower(raw)
} else {
raw
}
};
if tc_cmp != pc {
if match_slash {
return WM_ABORT_ALL;
} else {
return WM_ABORT_TO_STARSTAR;
}
}
}
let matched = dowild(&p[pi..], &text[cur_t..], flags);
if matched != WM_NOMATCH {
if !match_slash || matched != WM_ABORT_TO_STARSTAR {
return matched;
}
} else {
let cur_raw = if cur_t < text.len() { text[cur_t] } else { 0 };
if !match_slash && cur_raw == b'/' {
return WM_ABORT_TO_STARSTAR;
}
}
cur_t += 1;
}
return WM_ABORT_ALL;
}
b'[' => {
pi += 1;
let mut p_ch2 = if pi < p.len() { p[pi] } else { 0 };
if p_ch2 == b'^' {
p_ch2 = b'!';
}
let negated = p_ch2 == b'!';
if negated {
pi += 1;
p_ch2 = if pi < p.len() { p[pi] } else { 0 };
}
let mut prev_ch: u8 = 0;
let mut matched = false;
loop {
if p_ch2 == 0 {
return WM_ABORT_ALL;
}
let mut next_prev: u8 = p_ch2;
let mut skip_class = false;
if p_ch2 == b'\\' {
pi += 1;
p_ch2 = if pi < p.len() { p[pi] } else { 0 };
if p_ch2 == 0 {
return WM_ABORT_ALL;
}
if t_ch == p_ch2 {
matched = true;
}
next_prev = p_ch2;
} else if p_ch2 == b'-' && prev_ch != 0 && pi + 1 < p.len() && p[pi + 1] != b']'
{
pi += 1;
p_ch2 = p[pi];
if p_ch2 == b'\\' {
pi += 1;
p_ch2 = if pi < p.len() { p[pi] } else { 0 };
if p_ch2 == 0 {
return WM_ABORT_ALL;
}
}
if t_ch <= p_ch2 && t_ch >= prev_ch {
matched = true;
} else if (flags & WM_CASEFOLD) != 0 && wm_islower(t_ch) {
let t_up = wm_toupper(t_ch);
if t_up <= p_ch2 && t_up >= prev_ch {
matched = true;
}
}
next_prev = 0;
} else if p_ch2 == b'[' && pi + 1 < p.len() && p[pi + 1] == b':' {
let s = pi + 2;
let mut scan = s;
loop {
if scan >= p.len() {
break;
}
if p[scan] == b']' {
break;
}
scan += 1;
}
pi = scan;
p_ch2 = if pi < p.len() { p[pi] } else { 0 };
if p_ch2 == 0 {
return WM_ABORT_ALL;
}
let class_end = pi; if class_end < s + 1 || p[class_end - 1] != b':' {
pi = s.wrapping_sub(2);
p_ch2 = b'[';
if t_ch == p_ch2 {
matched = true;
}
skip_class = true;
next_prev = p_ch2;
} else {
let class = &p[s..class_end - 1];
match wm_class_matches(class, t_ch, flags) {
Some(true) => matched = true,
Some(false) => {}
None => return WM_ABORT_ALL,
}
next_prev = 0;
}
} else if t_ch == p_ch2 {
matched = true;
}
let _ = skip_class;
prev_ch = next_prev;
pi += 1;
p_ch2 = if pi < p.len() { p[pi] } else { 0 };
if p_ch2 == b']' {
break;
}
}
if matched == negated || ((flags & WM_PATHNAME) != 0 && t_ch == b'/') {
return WM_NOMATCH;
}
pi += 1;
ti += 1;
continue;
}
b'\\' => {
pi += 1;
let lit = if pi < p.len() { p[pi] } else { 0 };
let lit = if (flags & WM_CASEFOLD) != 0 && wm_isupper(lit) {
wm_tolower(lit)
} else {
lit
};
if t_ch != lit {
return WM_NOMATCH;
}
pi += 1;
ti += 1;
continue;
}
_ => {
if t_ch != p_ch {
return WM_NOMATCH;
}
pi += 1;
ti += 1;
continue;
}
}
}
if ti < text.len() && text[ti] != 0 {
WM_NOMATCH
} else {
WM_MATCH
}
}
pub fn wildmatch(pattern: &[u8], text: &[u8], flags: u32) -> bool {
dowild(pattern, text, flags) == WM_MATCH
}
#[cfg(test)]
mod tests {
use super::*;
fn ps(args: &[&str]) -> Pathspec {
Pathspec::parse(
args.iter().map(|s| s.as_bytes()),
PathspecMatchMagic::default(),
)
.expect("valid pathspec")
}
#[test]
fn empty_pathspec_matches_everything() {
let p = Pathspec::default();
assert!(p.is_empty());
assert!(p.matches(b"any/path"));
}
#[test]
fn literal_prefix_matches_directory_recursively() {
let p = ps(&["src"]);
assert!(p.matches(b"src"));
assert!(p.matches(b"src/lib.rs"));
assert!(!p.matches(b"srcs/lib.rs"));
assert!(!p.matches(b"other"));
}
#[test]
fn exclude_subtracts_from_includes() {
let p = ps(&["src", ":(exclude)src/gen"]);
assert!(p.matches(b"src/lib.rs"));
assert!(!p.matches(b"src/gen/x.rs"));
}
#[test]
fn exclude_shorthand_sigils() {
for spec in [":!foo", ":^foo"] {
let p = ps(&[spec]);
assert!(p.elements()[0].is_exclude());
assert!(p.matches(b"bar"));
assert!(!p.matches(b"foo"));
}
}
#[test]
fn icase_magic_folds_case() {
let p = ps(&[":(icase)readme"]);
assert!(p.matches(b"README"));
assert!(p.matches(b"readme"));
let plain = ps(&["readme"]);
assert!(!plain.matches(b"README"));
}
#[test]
fn glob_magic_is_pathname_aware() {
let p = ps(&[":(glob)*.rs"]);
assert!(p.matches(b"lib.rs"));
assert!(!p.matches(b"src/lib.rs"));
let pp = ps(&[":(glob)**/*.rs"]);
assert!(pp.matches(b"src/lib.rs"));
}
#[test]
fn literal_magic_disables_wildcards() {
let p = ps(&[":(literal)a*b"]);
assert!(p.matches(b"a*b"));
assert!(!p.matches(b"axxb"));
}
#[test]
fn top_magic_is_parsed() {
let p = ps(&[":(top)src", ":/other"]);
assert!(p.elements()[0].is_top());
assert!(p.elements()[1].is_top());
}
#[test]
fn attr_magic_is_retained() {
let p = ps(&[":(attr:binary)data"]);
assert_eq!(p.elements()[0].attrs(), &[b"binary".to_vec()]);
assert_eq!(p.elements()[0].pattern(), b"data");
}
#[test]
fn combined_magic_words() {
let p = ps(&[":(exclude,icase)Cargo.lock"]);
let el = &p.elements()[0];
assert!(el.is_exclude());
assert!(!p.matches(b"CARGO.LOCK"));
}
fn parse_err(arg: &[u8]) -> PathspecParseError {
match Pathspec::parse([arg], PathspecMatchMagic::default()) {
Ok(_) => panic!(
"expected parse error for {:?}",
String::from_utf8_lossy(arg)
),
Err(e) => e,
}
}
#[test]
fn glob_literal_conflict_is_error() {
assert_eq!(
parse_err(b":(glob,literal)x"),
PathspecParseError::GlobLiteralConflict
);
}
#[test]
fn unknown_magic_is_error() {
assert!(matches!(
parse_err(b":(bogus)x"),
PathspecParseError::UnknownMagic(_)
));
}
#[test]
fn unterminated_magic_is_error() {
assert_eq!(
parse_err(b":(exclude"),
PathspecParseError::UnterminatedMagic
);
}
#[test]
fn exclude_only_keeps_unmatched() {
let p = ps(&[":(exclude)target"]);
assert!(p.matches(b"src/lib.rs"));
assert!(!p.matches(b"target/debug"));
}
}