Skip to main content

picomatch_rs/
scan.rs

1use serde::{Deserialize, Serialize};
2
3use crate::constants::{
4    CHAR_ASTERISK, CHAR_AT, CHAR_BACKWARD_SLASH, CHAR_COMMA, CHAR_DOT, CHAR_EXCLAMATION_MARK,
5    CHAR_FORWARD_SLASH, CHAR_LEFT_CURLY_BRACE, CHAR_LEFT_PARENTHESES, CHAR_LEFT_SQUARE_BRACKET,
6    CHAR_PLUS, CHAR_QUESTION_MARK, CHAR_RIGHT_CURLY_BRACE, CHAR_RIGHT_PARENTHESES,
7    CHAR_RIGHT_SQUARE_BRACKET,
8};
9use crate::utils::{is_path_separator, remove_backslashes};
10
11#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)]
12#[serde(default, rename_all = "camelCase")]
13pub struct ScanOptions {
14    pub parts: bool,
15    pub tokens: bool,
16    pub scan_to_end: bool,
17    pub noext: bool,
18    pub nonegate: bool,
19    pub noparen: bool,
20    pub unescape: bool,
21}
22
23#[derive(Debug, Clone, Serialize, PartialEq)]
24#[serde(rename_all = "camelCase")]
25pub struct ScanToken {
26    pub value: String,
27    pub depth: f64,
28    pub is_glob: bool,
29    #[serde(skip_serializing_if = "Option::is_none")]
30    pub is_globstar: Option<bool>,
31    #[serde(skip_serializing_if = "Option::is_none")]
32    pub is_brace: Option<bool>,
33    #[serde(skip_serializing_if = "Option::is_none")]
34    pub is_bracket: Option<bool>,
35    #[serde(skip_serializing_if = "Option::is_none")]
36    pub is_extglob: Option<bool>,
37    #[serde(skip_serializing_if = "Option::is_none")]
38    pub negated: Option<bool>,
39    #[serde(skip_serializing_if = "Option::is_none")]
40    pub backslashes: Option<bool>,
41    #[serde(skip_serializing_if = "Option::is_none")]
42    pub is_prefix: Option<bool>,
43}
44
45impl Default for ScanToken {
46    fn default() -> Self {
47        Self {
48            value: String::new(),
49            depth: 0.0,
50            is_glob: false,
51            is_globstar: None,
52            is_brace: None,
53            is_bracket: None,
54            is_extglob: None,
55            negated: None,
56            backslashes: None,
57            is_prefix: None,
58        }
59    }
60}
61
62#[derive(Debug, Clone, Serialize, PartialEq)]
63#[serde(rename_all = "camelCase")]
64pub struct ScanState {
65    pub prefix: String,
66    pub input: String,
67    pub start: usize,
68    pub base: String,
69    pub glob: String,
70    pub is_brace: bool,
71    pub is_bracket: bool,
72    pub is_glob: bool,
73    pub is_extglob: bool,
74    pub is_globstar: bool,
75    pub negated: bool,
76    pub negated_extglob: bool,
77    #[serde(skip_serializing_if = "Option::is_none")]
78    pub max_depth: Option<f64>,
79    #[serde(skip_serializing_if = "Option::is_none")]
80    pub tokens: Option<Vec<ScanToken>>,
81    #[serde(skip_serializing_if = "Option::is_none")]
82    pub slashes: Option<Vec<usize>>,
83    #[serde(skip_serializing_if = "Option::is_none")]
84    pub parts: Option<Vec<String>>,
85}
86
87struct InputView<'a> {
88    raw: &'a str,
89    chars: Vec<char>,
90    offsets: Vec<usize>,
91}
92
93impl<'a> InputView<'a> {
94    fn new(raw: &'a str) -> Self {
95        let chars: Vec<char> = raw.chars().collect();
96        let mut offsets = raw.char_indices().map(|(idx, _)| idx).collect::<Vec<_>>();
97        offsets.push(raw.len());
98        Self {
99            raw,
100            chars,
101            offsets,
102        }
103    }
104
105    fn len(&self) -> usize {
106        self.chars.len()
107    }
108
109    fn char_at(&self, index: usize) -> Option<char> {
110        self.chars.get(index).copied()
111    }
112
113    fn slice(&self, start: usize, end: usize) -> String {
114        if start >= end {
115            return String::new();
116        }
117        self.raw[self.offsets[start]..self.offsets[end]].to_string()
118    }
119
120    fn slice_from(&self, start: usize) -> String {
121        self.slice(start, self.len())
122    }
123}
124
125fn depth(token: &mut ScanToken) {
126    if token.is_prefix != Some(true) {
127        token.depth = if token.is_globstar == Some(true) {
128            f64::INFINITY
129        } else {
130            1.0
131        };
132    }
133}
134
135fn is_extglob_char(ch: char) -> bool {
136    matches!(
137        ch,
138        CHAR_PLUS | CHAR_AT | CHAR_ASTERISK | CHAR_QUESTION_MARK | CHAR_EXCLAMATION_MARK
139    )
140}
141
142pub fn scan(input: &str, options: &ScanOptions) -> ScanState {
143    let view = InputView::new(input);
144    let scan_to_end = options.parts || options.scan_to_end;
145    let mut slashes = Vec::new();
146    let mut tokens = Vec::new();
147    let mut parts = Vec::new();
148
149    let length = view.len().saturating_sub(1);
150    let mut index: isize = -1;
151    let mut start = 0usize;
152    let mut last_index = 0usize;
153    let mut is_brace = false;
154    let mut is_bracket = false;
155    let mut is_glob = false;
156    let mut is_extglob = false;
157    let mut is_globstar = false;
158    let mut brace_escaped = false;
159    let mut backslashes = false;
160    let mut negated = false;
161    let mut negated_extglob = false;
162    let mut finished = false;
163    let mut braces = 0usize;
164    let mut prev = '\0';
165    let mut code = '\0';
166    let mut token = ScanToken::default();
167
168    let eos = |idx: isize| idx >= length as isize;
169    let peek = |idx: isize| view.char_at((idx + 1) as usize);
170    let next_char = |idx: &mut isize, prev_code: &mut char, current_code: char| -> Option<char> {
171        *prev_code = current_code;
172        *idx += 1;
173        view.char_at(*idx as usize)
174    };
175
176    while index < length as isize {
177        let Some(current) = next_char(&mut index, &mut prev, code) else {
178            break;
179        };
180        code = current;
181
182        if code == CHAR_BACKWARD_SLASH {
183            backslashes = true;
184            token.backslashes = Some(true);
185
186            let Some(next) = next_char(&mut index, &mut prev, code) else {
187                break;
188            };
189            code = next;
190
191            if code == CHAR_LEFT_CURLY_BRACE {
192                brace_escaped = true;
193            }
194            continue;
195        }
196
197        if brace_escaped || code == CHAR_LEFT_CURLY_BRACE {
198            braces += 1;
199
200            while !eos(index) {
201                let Some(next) = next_char(&mut index, &mut prev, code) else {
202                    break;
203                };
204                code = next;
205
206                if code == CHAR_BACKWARD_SLASH {
207                    backslashes = true;
208                    token.backslashes = Some(true);
209                    if let Some(escaped) = next_char(&mut index, &mut prev, code) {
210                        code = escaped;
211                    }
212                    continue;
213                }
214
215                if code == CHAR_LEFT_CURLY_BRACE {
216                    braces += 1;
217                    continue;
218                }
219
220                if !brace_escaped && code == CHAR_DOT {
221                    if let Some(next_code) = next_char(&mut index, &mut prev, code) {
222                        code = next_code;
223                        if code == CHAR_DOT {
224                            is_brace = true;
225                            token.is_brace = Some(true);
226                            is_glob = true;
227                            token.is_glob = true;
228                            finished = true;
229
230                            if scan_to_end {
231                                continue;
232                            }
233
234                            break;
235                        }
236                    } else {
237                        break;
238                    }
239                }
240
241                if !brace_escaped && code == CHAR_COMMA {
242                    is_brace = true;
243                    token.is_brace = Some(true);
244                    is_glob = true;
245                    token.is_glob = true;
246                    finished = true;
247
248                    if scan_to_end {
249                        continue;
250                    }
251
252                    break;
253                }
254
255                if code == CHAR_RIGHT_CURLY_BRACE {
256                    braces = braces.saturating_sub(1);
257
258                    if braces == 0 {
259                        brace_escaped = false;
260                        is_brace = true;
261                        token.is_brace = Some(true);
262                        finished = true;
263                        break;
264                    }
265                }
266            }
267
268            if scan_to_end {
269                continue;
270            }
271
272            break;
273        }
274
275        if code == CHAR_FORWARD_SLASH {
276            slashes.push(index as usize);
277            tokens.push(token);
278            token = ScanToken::default();
279
280            if finished {
281                continue;
282            }
283
284            if prev == CHAR_DOT && index as usize == start + 1 {
285                start += 2;
286                continue;
287            }
288
289            last_index = index as usize + 1;
290            continue;
291        }
292
293        if !options.noext && is_extglob_char(code) && peek(index) == Some(CHAR_LEFT_PARENTHESES) {
294            is_glob = true;
295            token.is_glob = true;
296            is_extglob = true;
297            token.is_extglob = Some(true);
298            finished = true;
299
300            if code == CHAR_EXCLAMATION_MARK && index as usize == start {
301                negated_extglob = true;
302            }
303
304            if scan_to_end {
305                while !eos(index) {
306                    let Some(next) = next_char(&mut index, &mut prev, code) else {
307                        break;
308                    };
309                    code = next;
310
311                    if code == CHAR_BACKWARD_SLASH {
312                        backslashes = true;
313                        token.backslashes = Some(true);
314                        if let Some(escaped) = next_char(&mut index, &mut prev, code) {
315                            code = escaped;
316                        }
317                        continue;
318                    }
319
320                    if code == CHAR_RIGHT_PARENTHESES {
321                        is_glob = true;
322                        token.is_glob = true;
323                        finished = true;
324                        break;
325                    }
326                }
327                continue;
328            }
329
330            break;
331        }
332
333        if code == CHAR_ASTERISK {
334            if prev == CHAR_ASTERISK {
335                is_globstar = true;
336                token.is_globstar = Some(true);
337            }
338            is_glob = true;
339            token.is_glob = true;
340            finished = true;
341
342            if scan_to_end {
343                continue;
344            }
345            break;
346        }
347
348        if code == CHAR_QUESTION_MARK {
349            is_glob = true;
350            token.is_glob = true;
351            finished = true;
352
353            if scan_to_end {
354                continue;
355            }
356            break;
357        }
358
359        if code == CHAR_LEFT_SQUARE_BRACKET {
360            while !eos(index) {
361                let Some(next) = next_char(&mut index, &mut prev, code) else {
362                    break;
363                };
364
365                if next == CHAR_BACKWARD_SLASH {
366                    backslashes = true;
367                    token.backslashes = Some(true);
368                    let _ = next_char(&mut index, &mut prev, next);
369                    continue;
370                }
371
372                if next == CHAR_RIGHT_SQUARE_BRACKET {
373                    is_bracket = true;
374                    token.is_bracket = Some(true);
375                    is_glob = true;
376                    token.is_glob = true;
377                    finished = true;
378                    break;
379                }
380            }
381
382            if scan_to_end {
383                continue;
384            }
385
386            break;
387        }
388
389        if !options.nonegate && code == CHAR_EXCLAMATION_MARK && index as usize == start {
390            negated = true;
391            token.negated = Some(true);
392            start += 1;
393            continue;
394        }
395
396        if !options.noparen && code == CHAR_LEFT_PARENTHESES {
397            is_glob = true;
398            token.is_glob = true;
399
400            if scan_to_end {
401                while !eos(index) {
402                    let Some(next) = next_char(&mut index, &mut prev, code) else {
403                        break;
404                    };
405                    code = next;
406
407                    if code == CHAR_LEFT_PARENTHESES {
408                        backslashes = true;
409                        token.backslashes = Some(true);
410                        if let Some(escaped) = next_char(&mut index, &mut prev, code) {
411                            code = escaped;
412                        }
413                        continue;
414                    }
415
416                    if code == CHAR_RIGHT_PARENTHESES {
417                        finished = true;
418                        break;
419                    }
420                }
421                continue;
422            }
423            break;
424        }
425
426        if is_glob {
427            finished = true;
428
429            if scan_to_end {
430                continue;
431            }
432
433            break;
434        }
435    }
436
437    if options.noext {
438        is_extglob = false;
439        is_glob = false;
440    }
441
442    let mut str = input.to_string();
443    let mut prefix = String::new();
444    let mut base;
445    let mut glob = String::new();
446
447    if start > 0 {
448        prefix = view.slice(0, start);
449        str = view.slice_from(start);
450        last_index = last_index.saturating_sub(start);
451    }
452
453    base = str.clone();
454
455    let str_view = InputView::new(&str);
456    if !base.is_empty() && is_glob && last_index > 0 {
457        base = str_view.slice(0, last_index);
458        glob = str_view.slice_from(last_index);
459    } else if is_glob {
460        base.clear();
461        glob = str.clone();
462    }
463
464    if !base.is_empty() && base != "/" && base != str {
465        if base.chars().last().is_some_and(is_path_separator) {
466            base.pop();
467        }
468    }
469
470    if options.unescape {
471        if !glob.is_empty() {
472            glob = remove_backslashes(&glob);
473        }
474
475        if !base.is_empty() && backslashes {
476            base = remove_backslashes(&base);
477        }
478    }
479
480    let mut state = ScanState {
481        prefix,
482        input: input.to_string(),
483        start,
484        base,
485        glob,
486        is_brace,
487        is_bracket,
488        is_glob,
489        is_extglob,
490        is_globstar,
491        negated,
492        negated_extglob,
493        max_depth: None,
494        tokens: None,
495        slashes: None,
496        parts: None,
497    };
498
499    if options.tokens {
500        state.max_depth = Some(0.0);
501        if !is_path_separator(code) {
502            tokens.push(token);
503        }
504    }
505
506    if options.parts || options.tokens {
507        let mut prev_index: Option<usize> = None;
508
509        for (idx, slash_index) in slashes.iter().copied().enumerate() {
510            let n = match prev_index {
511                Some(prev_index_value) if prev_index_value != 0 => prev_index_value + 1,
512                _ => start,
513            };
514            let value = view.slice(n, slash_index);
515
516            if options.tokens {
517                if idx == 0 && start != 0 {
518                    tokens[idx].is_prefix = Some(true);
519                    tokens[idx].value = state.prefix.clone();
520                } else {
521                    tokens[idx].value = value.clone();
522                }
523                depth(&mut tokens[idx]);
524                if let Some(max_depth) = state.max_depth.as_mut() {
525                    *max_depth += tokens[idx].depth;
526                }
527            }
528
529            if idx != 0 || !value.is_empty() {
530                parts.push(value);
531            }
532            prev_index = Some(slash_index);
533        }
534
535        if let Some(prev_index_value) = prev_index {
536            if prev_index_value != 0 && prev_index_value + 1 < view.len() {
537                let value = view.slice(prev_index_value + 1, view.len());
538                parts.push(value.clone());
539
540                if options.tokens {
541                    let last = tokens.len() - 1;
542                    tokens[last].value = value;
543                    depth(&mut tokens[last]);
544                    if let Some(max_depth) = state.max_depth.as_mut() {
545                        *max_depth += tokens[last].depth;
546                    }
547                }
548            }
549        }
550
551        state.slashes = Some(slashes);
552        state.parts = Some(parts);
553    }
554
555    if options.tokens {
556        state.tokens = Some(tokens);
557    }
558
559    state
560}