gix_pathspec/
parse.rs

1use std::borrow::Cow;
2
3use bstr::{BStr, BString, ByteSlice, ByteVec};
4
5use crate::{Defaults, MagicSignature, Pattern, SearchMode};
6
7/// The error returned by [parse()][crate::parse()].
8#[derive(thiserror::Error, Debug)]
9#[allow(missing_docs)]
10pub enum Error {
11    #[error("An empty string is not a valid pathspec")]
12    EmptyString,
13    #[error("Found {keyword:?} in signature, which is not a valid keyword")]
14    InvalidKeyword { keyword: BString },
15    #[error("Unimplemented short keyword: {short_keyword:?}")]
16    Unimplemented { short_keyword: char },
17    #[error("Missing ')' at the end of pathspec signature")]
18    MissingClosingParenthesis,
19    #[error("Attribute has non-ascii characters or starts with '-': {attribute:?}")]
20    InvalidAttribute { attribute: BString },
21    #[error("Invalid character in attribute value: {character:?}")]
22    InvalidAttributeValue { character: char },
23    #[error(r"Escape character '\' is not allowed as the last character in an attribute value")]
24    TrailingEscapeCharacter,
25    #[error("Attribute specification cannot be empty")]
26    EmptyAttribute,
27    #[error("Only one attribute specification is allowed in the same pathspec")]
28    MultipleAttributeSpecifications,
29    #[error("'literal' and 'glob' keywords cannot be used together in the same pathspec")]
30    IncompatibleSearchModes,
31}
32
33impl Pattern {
34    /// Try to parse a path-spec pattern from the given `input` bytes.
35    pub fn from_bytes(
36        input: &[u8],
37        Defaults {
38            signature,
39            search_mode,
40            literal,
41        }: Defaults,
42    ) -> Result<Self, Error> {
43        if input.is_empty() {
44            return Err(Error::EmptyString);
45        }
46        if literal {
47            return Ok(Self::from_literal(input, signature));
48        }
49        if input.as_bstr() == ":" {
50            return Ok(Pattern {
51                nil: true,
52                ..Default::default()
53            });
54        }
55
56        let mut p = Pattern {
57            signature,
58            search_mode: SearchMode::default(),
59            ..Default::default()
60        };
61
62        let mut cursor = 0;
63        if input.first() == Some(&b':') {
64            cursor += 1;
65            p.signature |= parse_short_keywords(input, &mut cursor)?;
66            if let Some(b'(') = input.get(cursor) {
67                cursor += 1;
68                parse_long_keywords(input, &mut p, &mut cursor)?;
69            }
70        }
71
72        if search_mode != Default::default() && p.search_mode == Default::default() {
73            p.search_mode = search_mode;
74        }
75        let mut path = &input[cursor..];
76        if path.last() == Some(&b'/') {
77            p.signature |= MagicSignature::MUST_BE_DIR;
78            path = &path[..path.len() - 1];
79        }
80        p.path = path.into();
81        Ok(p)
82    }
83
84    /// Take `input` literally without parsing anything. This will also set our mode to `literal` to allow this pathspec to match `input` verbatim, and
85    /// use `default_signature` as magic signature.
86    pub fn from_literal(input: &[u8], default_signature: MagicSignature) -> Self {
87        Pattern {
88            path: input.into(),
89            signature: default_signature,
90            search_mode: SearchMode::Literal,
91            ..Default::default()
92        }
93    }
94}
95
96fn parse_short_keywords(input: &[u8], cursor: &mut usize) -> Result<MagicSignature, Error> {
97    let unimplemented_chars = b"\"#%&'-',;<=>@_`~";
98
99    let mut signature = MagicSignature::empty();
100    while let Some(&b) = input.get(*cursor) {
101        *cursor += 1;
102        signature |= match b {
103            b'/' => MagicSignature::TOP,
104            b'^' | b'!' => MagicSignature::EXCLUDE,
105            b':' => break,
106            _ if unimplemented_chars.contains(&b) => {
107                return Err(Error::Unimplemented {
108                    short_keyword: b.into(),
109                });
110            }
111            _ => {
112                *cursor -= 1;
113                break;
114            }
115        }
116    }
117
118    Ok(signature)
119}
120
121fn parse_long_keywords(input: &[u8], p: &mut Pattern, cursor: &mut usize) -> Result<(), Error> {
122    let end = input.find(")").ok_or(Error::MissingClosingParenthesis)?;
123
124    let input = &input[*cursor..end];
125    *cursor = end + 1;
126
127    if input.is_empty() {
128        return Ok(());
129    }
130
131    split_on_non_escaped_char(input, b',', |keyword| {
132        let attr_prefix = b"attr:";
133        match keyword {
134            b"attr" => {}
135            b"top" => p.signature |= MagicSignature::TOP,
136            b"icase" => p.signature |= MagicSignature::ICASE,
137            b"exclude" => p.signature |= MagicSignature::EXCLUDE,
138            b"literal" => match p.search_mode {
139                SearchMode::PathAwareGlob => return Err(Error::IncompatibleSearchModes),
140                _ => p.search_mode = SearchMode::Literal,
141            },
142            b"glob" => match p.search_mode {
143                SearchMode::Literal => return Err(Error::IncompatibleSearchModes),
144                _ => p.search_mode = SearchMode::PathAwareGlob,
145            },
146            _ if keyword.starts_with(attr_prefix) => {
147                if p.attributes.is_empty() {
148                    p.attributes = parse_attributes(&keyword[attr_prefix.len()..])?;
149                } else {
150                    return Err(Error::MultipleAttributeSpecifications);
151                }
152            }
153            _ => {
154                return Err(Error::InvalidKeyword {
155                    keyword: BString::from(keyword),
156                });
157            }
158        }
159        Ok(())
160    })
161}
162
163fn split_on_non_escaped_char(
164    input: &[u8],
165    split_char: u8,
166    mut f: impl FnMut(&[u8]) -> Result<(), Error>,
167) -> Result<(), Error> {
168    let mut i = 0;
169    let mut last = 0;
170    for window in input.windows(2) {
171        i += 1;
172        if window[0] != b'\\' && window[1] == split_char {
173            let keyword = &input[last..i];
174            f(keyword)?;
175            last = i + 1;
176        }
177    }
178    let last_keyword = &input[last..];
179    f(last_keyword)
180}
181
182fn parse_attributes(input: &[u8]) -> Result<Vec<gix_attributes::Assignment>, Error> {
183    if input.is_empty() {
184        return Err(Error::EmptyAttribute);
185    }
186
187    let unescaped = unescape_attribute_values(input.into())?;
188
189    gix_attributes::parse::Iter::new(unescaped.as_bstr())
190        .map(|res| res.map(gix_attributes::AssignmentRef::to_owned))
191        .collect::<Result<Vec<_>, _>>()
192        .map_err(|e| Error::InvalidAttribute { attribute: e.attribute })
193}
194
195fn unescape_attribute_values(input: &BStr) -> Result<Cow<'_, BStr>, Error> {
196    if !input.contains(&b'=') {
197        return Ok(Cow::Borrowed(input));
198    }
199
200    let mut out: Cow<'_, BStr> = Cow::Borrowed("".into());
201
202    for attr in input.split(|&c| c == b' ') {
203        let split_point = attr.find_byte(b'=').map_or_else(|| attr.len(), |i| i + 1);
204        let (name, value) = attr.split_at(split_point);
205
206        if value.contains(&b'\\') {
207            let out = out.to_mut();
208            out.push_str(name);
209            out.push_str(unescape_and_check_attr_value(value.into())?);
210            out.push(b' ');
211        } else {
212            check_attribute_value(value.as_bstr())?;
213            match out {
214                Cow::Borrowed(_) => {
215                    let end = out.len() + attr.len() + 1;
216                    out = Cow::Borrowed(&input[0..end.min(input.len())]);
217                }
218                Cow::Owned(_) => {
219                    let out = out.to_mut();
220                    out.push_str(name);
221                    out.push_str(value);
222                    out.push(b' ');
223                }
224            }
225        }
226    }
227
228    Ok(out)
229}
230
231fn unescape_and_check_attr_value(value: &BStr) -> Result<BString, Error> {
232    let mut out = BString::from(Vec::with_capacity(value.len()));
233    let mut bytes = value.iter();
234    while let Some(mut b) = bytes.next().copied() {
235        if b == b'\\' {
236            b = *bytes.next().ok_or(Error::TrailingEscapeCharacter)?;
237        }
238
239        out.push(validated_attr_value_byte(b)?);
240    }
241    Ok(out)
242}
243
244fn check_attribute_value(input: &BStr) -> Result<(), Error> {
245    match input.iter().copied().find(|b| !is_valid_attr_value(*b)) {
246        Some(b) => Err(Error::InvalidAttributeValue { character: b as char }),
247        None => Ok(()),
248    }
249}
250
251fn is_valid_attr_value(byte: u8) -> bool {
252    byte.is_ascii_alphanumeric() || b",-_".contains(&byte)
253}
254
255fn validated_attr_value_byte(byte: u8) -> Result<u8, Error> {
256    if is_valid_attr_value(byte) {
257        Ok(byte)
258    } else {
259        Err(Error::InvalidAttributeValue {
260            character: byte as char,
261        })
262    }
263}