1use std::borrow::Cow;
2
3use bstr::{BStr, BString, ByteSlice, ByteVec};
4
5use crate::{Defaults, MagicSignature, Pattern, SearchMode};
6
7#[derive(thiserror::Error, Debug)]
9#[allow(missing_docs)]
10pub enum Error {
11 #[error("An empty string is not a valid pathspec")]
12 EmptyString,
13 #[error("Found {keyword:?} in signature, which is not a valid keyword")]
14 InvalidKeyword { keyword: BString },
15 #[error("Unimplemented short keyword: {short_keyword:?}")]
16 Unimplemented { short_keyword: char },
17 #[error("Missing ')' at the end of pathspec signature")]
18 MissingClosingParenthesis,
19 #[error("Attribute has non-ascii characters or starts with '-': {attribute:?}")]
20 InvalidAttribute { attribute: BString },
21 #[error("Invalid character in attribute value: {character:?}")]
22 InvalidAttributeValue { character: char },
23 #[error(r"Escape character '\' is not allowed as the last character in an attribute value")]
24 TrailingEscapeCharacter,
25 #[error("Attribute specification cannot be empty")]
26 EmptyAttribute,
27 #[error("Only one attribute specification is allowed in the same pathspec")]
28 MultipleAttributeSpecifications,
29 #[error("'literal' and 'glob' keywords cannot be used together in the same pathspec")]
30 IncompatibleSearchModes,
31}
32
33impl Pattern {
34 pub fn from_bytes(
36 input: &[u8],
37 Defaults {
38 signature,
39 search_mode,
40 literal,
41 }: Defaults,
42 ) -> Result<Self, Error> {
43 if input.is_empty() {
44 return Err(Error::EmptyString);
45 }
46 if literal {
47 return Ok(Self::from_literal(input, signature));
48 }
49 if input.as_bstr() == ":" {
50 return Ok(Pattern {
51 nil: true,
52 ..Default::default()
53 });
54 }
55
56 let mut p = Pattern {
57 signature,
58 search_mode: SearchMode::default(),
59 ..Default::default()
60 };
61
62 let mut cursor = 0;
63 if input.first() == Some(&b':') {
64 cursor += 1;
65 p.signature |= parse_short_keywords(input, &mut cursor)?;
66 if let Some(b'(') = input.get(cursor) {
67 cursor += 1;
68 parse_long_keywords(input, &mut p, &mut cursor)?;
69 }
70 }
71
72 if search_mode != Default::default() && p.search_mode == Default::default() {
73 p.search_mode = search_mode;
74 }
75 let mut path = &input[cursor..];
76 if path.last() == Some(&b'/') {
77 p.signature |= MagicSignature::MUST_BE_DIR;
78 path = &path[..path.len() - 1];
79 }
80 p.path = path.into();
81 Ok(p)
82 }
83
84 pub fn from_literal(input: &[u8], default_signature: MagicSignature) -> Self {
87 Pattern {
88 path: input.into(),
89 signature: default_signature,
90 search_mode: SearchMode::Literal,
91 ..Default::default()
92 }
93 }
94}
95
96fn parse_short_keywords(input: &[u8], cursor: &mut usize) -> Result<MagicSignature, Error> {
97 let unimplemented_chars = b"\"#%&'-',;<=>@_`~";
98
99 let mut signature = MagicSignature::empty();
100 while let Some(&b) = input.get(*cursor) {
101 *cursor += 1;
102 signature |= match b {
103 b'/' => MagicSignature::TOP,
104 b'^' | b'!' => MagicSignature::EXCLUDE,
105 b':' => break,
106 _ if unimplemented_chars.contains(&b) => {
107 return Err(Error::Unimplemented {
108 short_keyword: b.into(),
109 });
110 }
111 _ => {
112 *cursor -= 1;
113 break;
114 }
115 }
116 }
117
118 Ok(signature)
119}
120
121fn parse_long_keywords(input: &[u8], p: &mut Pattern, cursor: &mut usize) -> Result<(), Error> {
122 let end = input.find(")").ok_or(Error::MissingClosingParenthesis)?;
123
124 let input = &input[*cursor..end];
125 *cursor = end + 1;
126
127 if input.is_empty() {
128 return Ok(());
129 }
130
131 split_on_non_escaped_char(input, b',', |keyword| {
132 let attr_prefix = b"attr:";
133 match keyword {
134 b"attr" => {}
135 b"top" => p.signature |= MagicSignature::TOP,
136 b"icase" => p.signature |= MagicSignature::ICASE,
137 b"exclude" => p.signature |= MagicSignature::EXCLUDE,
138 b"literal" => match p.search_mode {
139 SearchMode::PathAwareGlob => return Err(Error::IncompatibleSearchModes),
140 _ => p.search_mode = SearchMode::Literal,
141 },
142 b"glob" => match p.search_mode {
143 SearchMode::Literal => return Err(Error::IncompatibleSearchModes),
144 _ => p.search_mode = SearchMode::PathAwareGlob,
145 },
146 _ if keyword.starts_with(attr_prefix) => {
147 if p.attributes.is_empty() {
148 p.attributes = parse_attributes(&keyword[attr_prefix.len()..])?;
149 } else {
150 return Err(Error::MultipleAttributeSpecifications);
151 }
152 }
153 _ => {
154 return Err(Error::InvalidKeyword {
155 keyword: BString::from(keyword),
156 });
157 }
158 }
159 Ok(())
160 })
161}
162
163fn split_on_non_escaped_char(
164 input: &[u8],
165 split_char: u8,
166 mut f: impl FnMut(&[u8]) -> Result<(), Error>,
167) -> Result<(), Error> {
168 let mut i = 0;
169 let mut last = 0;
170 for window in input.windows(2) {
171 i += 1;
172 if window[0] != b'\\' && window[1] == split_char {
173 let keyword = &input[last..i];
174 f(keyword)?;
175 last = i + 1;
176 }
177 }
178 let last_keyword = &input[last..];
179 f(last_keyword)
180}
181
182fn parse_attributes(input: &[u8]) -> Result<Vec<gix_attributes::Assignment>, Error> {
183 if input.is_empty() {
184 return Err(Error::EmptyAttribute);
185 }
186
187 let unescaped = unescape_attribute_values(input.into())?;
188
189 gix_attributes::parse::Iter::new(unescaped.as_bstr())
190 .map(|res| res.map(gix_attributes::AssignmentRef::to_owned))
191 .collect::<Result<Vec<_>, _>>()
192 .map_err(|e| Error::InvalidAttribute { attribute: e.attribute })
193}
194
195fn unescape_attribute_values(input: &BStr) -> Result<Cow<'_, BStr>, Error> {
196 if !input.contains(&b'=') {
197 return Ok(Cow::Borrowed(input));
198 }
199
200 let mut out: Cow<'_, BStr> = Cow::Borrowed("".into());
201
202 for attr in input.split(|&c| c == b' ') {
203 let split_point = attr.find_byte(b'=').map_or_else(|| attr.len(), |i| i + 1);
204 let (name, value) = attr.split_at(split_point);
205
206 if value.contains(&b'\\') {
207 let out = out.to_mut();
208 out.push_str(name);
209 out.push_str(unescape_and_check_attr_value(value.into())?);
210 out.push(b' ');
211 } else {
212 check_attribute_value(value.as_bstr())?;
213 match out {
214 Cow::Borrowed(_) => {
215 let end = out.len() + attr.len() + 1;
216 out = Cow::Borrowed(&input[0..end.min(input.len())]);
217 }
218 Cow::Owned(_) => {
219 let out = out.to_mut();
220 out.push_str(name);
221 out.push_str(value);
222 out.push(b' ');
223 }
224 }
225 }
226 }
227
228 Ok(out)
229}
230
231fn unescape_and_check_attr_value(value: &BStr) -> Result<BString, Error> {
232 let mut out = BString::from(Vec::with_capacity(value.len()));
233 let mut bytes = value.iter();
234 while let Some(mut b) = bytes.next().copied() {
235 if b == b'\\' {
236 b = *bytes.next().ok_or(Error::TrailingEscapeCharacter)?;
237 }
238
239 out.push(validated_attr_value_byte(b)?);
240 }
241 Ok(out)
242}
243
244fn check_attribute_value(input: &BStr) -> Result<(), Error> {
245 match input.iter().copied().find(|b| !is_valid_attr_value(*b)) {
246 Some(b) => Err(Error::InvalidAttributeValue { character: b as char }),
247 None => Ok(()),
248 }
249}
250
251fn is_valid_attr_value(byte: u8) -> bool {
252 byte.is_ascii_alphanumeric() || b",-_".contains(&byte)
253}
254
255fn validated_attr_value_byte(byte: u8) -> Result<u8, Error> {
256 if is_valid_attr_value(byte) {
257 Ok(byte)
258 } else {
259 Err(Error::InvalidAttributeValue {
260 character: byte as char,
261 })
262 }
263}