1#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16#[repr(u8)]
17pub enum PatOp {
18 End = 0x00, ExcSync = 0x01, ExcEnd = 0x02, Back = 0x03, Exactly = 0x04, Nothing = 0x05, OneHash = 0x06, TwoHash = 0x07, GFlags = 0x08, IsStart = 0x09, IsEnd = 0x0a, CountStart = 0x0b, Count = 0x0c, Branch = 0x20, WBranch = 0x21, Exclude = 0x30, ExcludP = 0x31, Any = 0x40, AnyOf = 0x41, AnyBut = 0x42, Star = 0x43, NumRng = 0x44, NumFrom = 0x45, NumTo = 0x46, NumAny = 0x47, Open = 0x80, Close = 0x90, }
46
47const NSUBEXP: usize = 9;
49
50#[derive(Debug, Clone, Copy, Default)]
52pub struct PatFlags {
53 pub file: bool, pub any: bool, pub noanch: bool, pub nogld: bool, pub pures: bool, pub scan: bool, pub lcmatchuc: bool, }
61
62#[derive(Debug, Clone, Copy, Default)]
64pub struct GlobFlags {
65 pub igncase: bool, pub lcmatchuc: bool, pub matchref: bool, pub backref: bool, pub multibyte: bool, pub approx: u8, }
72
73#[derive(Debug, Clone)]
75pub struct PatProg {
76 code: Vec<PatNode>,
78 pub flags: PatFlags,
80 pub glob_start: GlobFlags,
82 pub glob_end: GlobFlags,
84 pub npar: usize,
86 pub start_char: Option<char>,
88 pub pure_string: Option<String>,
90}
91
92#[derive(Debug, Clone)]
94pub enum PatNode {
95 End,
96 ExcSync,
97 ExcEnd,
98 Back(usize), Exactly(String), Nothing,
101 OneHash(Box<PatNode>), TwoHash(Box<PatNode>), GFlags(GlobFlags),
104 IsStart,
105 IsEnd,
106 CountStart,
107 Count {
108 min: u32,
109 max: Option<u32>,
110 node: Box<PatNode>,
111 },
112 Branch(Vec<PatNode>, usize), WBranch(Vec<PatNode>),
114 Exclude(Vec<PatNode>),
115 ExcludP(Vec<PatNode>),
116 Any, AnyOf(Vec<char>), AnyBut(Vec<char>), Star, NumRng(i64, i64), NumFrom(i64), NumTo(i64), NumAny, Open(usize), Close(usize), Sequence(Vec<PatNode>), }
128
129struct PatCompiler<'a> {
131 input: &'a str,
132 pos: usize,
133 flags: PatFlags,
134 glob_flags: GlobFlags,
135 npar: usize,
136 extended_glob: bool,
137 ksh_glob: bool,
138}
139
140impl<'a> PatCompiler<'a> {
141 fn new(input: &'a str, flags: PatFlags) -> Self {
142 PatCompiler {
143 input,
144 pos: 0,
145 flags,
146 glob_flags: GlobFlags::default(),
147 npar: 0,
148 extended_glob: true,
149 ksh_glob: true,
150 }
151 }
152
153 fn with_options(mut self, extended: bool, ksh: bool) -> Self {
154 self.extended_glob = extended;
155 self.ksh_glob = ksh;
156 self
157 }
158
159 fn with_igncase(mut self, igncase: bool) -> Self {
160 self.glob_flags.igncase = igncase;
161 self
162 }
163
164 fn peek(&self) -> Option<char> {
165 self.input[self.pos..].chars().next()
166 }
167
168 fn peek_n(&self, n: usize) -> Option<char> {
169 self.input[self.pos..].chars().nth(n)
170 }
171
172 fn advance(&mut self) -> Option<char> {
173 let c = self.peek()?;
174 self.pos += c.len_utf8();
175 Some(c)
176 }
177
178 fn at_end(&self) -> bool {
179 self.pos >= self.input.len()
180 }
181
182 fn compile(mut self) -> Result<PatProg, String> {
183 if !self.has_pattern_chars() {
185 return Ok(PatProg {
186 code: vec![PatNode::Exactly(self.input.to_string()), PatNode::End],
187 flags: PatFlags {
188 pures: true,
189 ..self.flags
190 },
191 glob_start: self.glob_flags,
192 glob_end: self.glob_flags,
193 npar: 0,
194 start_char: self.input.chars().next(),
195 pure_string: Some(self.input.to_string()),
196 });
197 }
198
199 let nodes = self.compile_branch()?;
200 let start_char = self.find_start_char(&nodes);
201
202 Ok(PatProg {
203 code: nodes,
204 flags: self.flags,
205 glob_start: self.glob_flags,
206 glob_end: self.glob_flags,
207 npar: self.npar,
208 start_char,
209 pure_string: None,
210 })
211 }
212
213 fn has_pattern_chars(&self) -> bool {
214 for c in self.input.chars() {
215 match c {
216 '*' | '?' | '[' | '\\' => return true,
217 '#' | '^' | '~' if self.extended_glob => return true,
218 '(' | ')' | '|' if self.ksh_glob => return true,
219 '<' | '>' if self.extended_glob => return true,
220 _ => {}
221 }
222 }
223 false
224 }
225
226 fn find_start_char(&self, nodes: &[PatNode]) -> Option<char> {
227 match nodes.first()? {
228 PatNode::Exactly(s) => s.chars().next(),
229 PatNode::Sequence(seq) => {
230 if let Some(PatNode::Exactly(s)) = seq.first() {
231 s.chars().next()
232 } else {
233 None
234 }
235 }
236 _ => None,
237 }
238 }
239
240 fn compile_branch(&mut self) -> Result<Vec<PatNode>, String> {
241 self.compile_branch_inner(true)
242 }
243
244 fn compile_branch_inner(&mut self, add_end: bool) -> Result<Vec<PatNode>, String> {
245 let mut nodes = Vec::new();
246 let mut alternatives: Vec<Vec<PatNode>> = Vec::new();
247
248 loop {
249 let node = self.compile_piece()?;
250 if let Some(n) = node {
251 nodes.push(n);
252 }
253
254 if self.at_end() {
255 break;
256 }
257
258 match self.peek() {
259 Some('|') => {
260 self.advance();
261 alternatives.push(std::mem::take(&mut nodes));
262 }
263 Some(')') => break,
264 None => break,
265 _ => {}
266 }
267 }
268
269 if !alternatives.is_empty() {
270 alternatives.push(nodes);
271 Ok(vec![PatNode::Branch(
272 alternatives.into_iter().flatten().collect(),
273 0,
274 )])
275 } else {
276 if add_end {
277 nodes.push(PatNode::End);
278 }
279 Ok(nodes)
280 }
281 }
282
283 fn compile_piece(&mut self) -> Result<Option<PatNode>, String> {
284 let Some(c) = self.peek() else {
285 return Ok(None);
286 };
287
288 let node = match c {
289 '*' => {
290 self.advance();
291 if self.ksh_glob && self.peek() == Some('(') {
293 self.advance();
294 let inner = self.compile_branch_inner(false)?;
295 if self.peek() != Some(')') {
296 return Err("missing ) in *(...)".to_string());
297 }
298 self.advance();
299 PatNode::OneHash(Box::new(PatNode::Sequence(inner)))
300 } else {
301 PatNode::Star
302 }
303 }
304 '?' => {
305 self.advance();
306 if self.ksh_glob && self.peek() == Some('(') {
308 self.advance();
309 let inner = self.compile_branch_inner(false)?;
310 if self.peek() != Some(')') {
311 return Err("missing ) in ?(...)".to_string());
312 }
313 self.advance();
314 PatNode::Branch(vec![PatNode::Sequence(inner), PatNode::Nothing], 0)
316 } else {
317 PatNode::Any
318 }
319 }
320 '[' => self.compile_bracket()?,
321 '\\' => {
322 self.advance();
323 if let Some(escaped) = self.advance() {
324 PatNode::Exactly(escaped.to_string())
325 } else {
326 PatNode::Exactly("\\".to_string())
327 }
328 }
329 '#' if self.extended_glob => {
330 self.advance();
331 if self.peek() == Some('#') {
333 self.advance();
334 return Ok(Some(PatNode::TwoHash(Box::new(PatNode::Any))));
336 }
337 PatNode::OneHash(Box::new(PatNode::Any))
339 }
340 '<' if self.extended_glob => self.compile_numeric_range()?,
341 '(' => {
342 self.advance();
343 self.npar += 1;
344 let group_num = self.npar;
345 let inner = self.compile_branch_inner(false)?;
346 if self.peek() != Some(')') {
347 return Err("missing )".to_string());
348 }
349 self.advance();
350 PatNode::Sequence(vec![
351 PatNode::Open(group_num),
352 PatNode::Sequence(inner),
353 PatNode::Close(group_num),
354 ])
355 }
356 ')' | '|' => return Ok(None),
357 '+' if self.ksh_glob && self.peek_n(1) == Some('(') => {
358 self.advance(); self.advance(); let inner = self.compile_branch_inner(false)?;
361 if self.peek() != Some(')') {
362 return Err("missing ) in +(...)".to_string());
363 }
364 self.advance();
365 PatNode::TwoHash(Box::new(PatNode::Sequence(inner)))
366 }
367 '!' if self.ksh_glob && self.peek_n(1) == Some('(') => {
368 self.advance(); self.advance(); let inner = self.compile_branch_inner(false)?;
371 if self.peek() != Some(')') {
372 return Err("missing ) in !(...)".to_string());
373 }
374 self.advance();
375 PatNode::Exclude(inner)
376 }
377 '@' if self.ksh_glob && self.peek_n(1) == Some('(') => {
378 self.advance(); self.advance(); let inner = self.compile_branch_inner(false)?;
381 if self.peek() != Some(')') {
382 return Err("missing ) in @(...)".to_string());
383 }
384 self.advance();
385 PatNode::Sequence(inner)
386 }
387 '^' if self.extended_glob => {
388 self.advance();
389 let inner = self.compile_piece()?;
391 if let Some(node) = inner {
392 PatNode::Exclude(vec![node])
393 } else {
394 return Err("^ requires pattern".to_string());
395 }
396 }
397 '~' if self.extended_glob => {
398 self.advance();
399 let inner = self.compile_piece()?;
401 if let Some(node) = inner {
402 PatNode::Exclude(vec![node])
403 } else {
404 return Err("~ requires pattern".to_string());
405 }
406 }
407 _ => {
408 let mut literal = String::new();
410 while let Some(ch) = self.peek() {
411 if self.is_special(ch) {
412 break;
413 }
414 literal.push(ch);
415 self.advance();
416 }
417 if literal.is_empty() {
418 return Ok(None);
419 }
420 PatNode::Exactly(literal)
421 }
422 };
423
424 if self.extended_glob {
426 match self.peek() {
427 Some('#') => {
428 self.advance();
429 if self.peek() == Some('#') {
430 self.advance();
431 return Ok(Some(PatNode::TwoHash(Box::new(node))));
432 }
433 return Ok(Some(PatNode::OneHash(Box::new(node))));
434 }
435 _ => {}
436 }
437 }
438
439 Ok(Some(node))
440 }
441
442 fn is_special(&self, c: char) -> bool {
443 matches!(c, '*' | '?' | '[' | '\\' | '(' | ')' | '|')
444 || (self.extended_glob && matches!(c, '#' | '^' | '~' | '<'))
445 || (self.ksh_glob && matches!(c, '+' | '!' | '@') && self.peek_n(1) == Some('('))
446 }
447
448 fn compile_bracket(&mut self) -> Result<PatNode, String> {
449 self.advance(); let negated = matches!(self.peek(), Some('!' | '^'));
452 if negated {
453 self.advance();
454 }
455
456 let mut chars = Vec::new();
457
458 if self.peek() == Some(']') {
460 chars.push(']');
461 self.advance();
462 }
463
464 while let Some(c) = self.peek() {
465 if c == ']' {
466 self.advance();
467 break;
468 }
469
470 if c == '\\' {
471 self.advance();
472 if let Some(escaped) = self.advance() {
473 chars.push(escaped);
474 }
475 continue;
476 }
477
478 if c == '[' && self.peek_n(1) == Some(':') {
480 if let Some(class_chars) = self.parse_posix_class() {
481 chars.extend(class_chars);
482 continue;
483 }
484 }
485
486 self.advance();
487
488 if self.peek() == Some('-') && self.peek_n(1) != Some(']') {
490 self.advance(); if let Some(end) = self.advance() {
492 for ch in c..=end {
493 chars.push(ch);
494 }
495 continue;
496 }
497 }
498
499 chars.push(c);
500 }
501
502 if negated {
503 Ok(PatNode::AnyBut(chars))
504 } else {
505 Ok(PatNode::AnyOf(chars))
506 }
507 }
508
509 fn parse_posix_class(&mut self) -> Option<Vec<char>> {
510 let start = self.pos;
511 self.advance(); self.advance(); let mut class_name = String::new();
515 while let Some(c) = self.peek() {
516 if c == ':' {
517 break;
518 }
519 class_name.push(c);
520 self.advance();
521 }
522
523 if self.peek() != Some(':') || self.peek_n(1) != Some(']') {
524 self.pos = start;
525 return None;
526 }
527 self.advance(); self.advance(); let chars: Vec<char> = match class_name.as_str() {
531 "alpha" => ('a'..='z').chain('A'..='Z').collect(),
532 "digit" => ('0'..='9').collect(),
533 "alnum" => ('a'..='z').chain('A'..='Z').chain('0'..='9').collect(),
534 "space" => vec![' ', '\t', '\n', '\r', '\x0b', '\x0c'],
535 "upper" => ('A'..='Z').collect(),
536 "lower" => ('a'..='z').collect(),
537 "punct" => "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~".chars().collect(),
538 "xdigit" => ('0'..='9').chain('a'..='f').chain('A'..='F').collect(),
539 "blank" => vec![' ', '\t'],
540 "cntrl" => (0u8..=31)
541 .map(|b| b as char)
542 .chain(std::iter::once(127 as char))
543 .collect(),
544 "graph" | "print" => (33u8..=126).map(|b| b as char).collect(),
545 "word" => ('a'..='z')
546 .chain('A'..='Z')
547 .chain('0'..='9')
548 .chain(std::iter::once('_'))
549 .collect(),
550 _ => return None,
551 };
552
553 Some(chars)
554 }
555
556 fn compile_numeric_range(&mut self) -> Result<PatNode, String> {
557 self.advance(); let mut from_str = String::new();
560 let mut to_str = String::new();
561 let mut in_to = false;
562
563 while let Some(c) = self.peek() {
564 if c == '>' {
565 self.advance();
566 break;
567 }
568 if c == '-' {
569 self.advance();
570 in_to = true;
571 continue;
572 }
573 if c.is_ascii_digit() {
574 if in_to {
575 to_str.push(c);
576 } else {
577 from_str.push(c);
578 }
579 self.advance();
580 } else {
581 return Err(format!("invalid character in numeric range: {}", c));
582 }
583 }
584
585 let from: Option<i64> = if from_str.is_empty() {
586 None
587 } else {
588 from_str.parse().ok()
589 };
590 let to: Option<i64> = if to_str.is_empty() {
591 None
592 } else {
593 to_str.parse().ok()
594 };
595
596 match (from, to) {
597 (Some(f), Some(t)) => Ok(PatNode::NumRng(f, t)),
598 (Some(f), None) => Ok(PatNode::NumFrom(f)),
599 (None, Some(t)) => Ok(PatNode::NumTo(t)),
600 (None, None) => Ok(PatNode::NumAny),
601 }
602 }
603}
604
605pub struct PatMatcher<'a> {
607 prog: &'a PatProg,
608 input: &'a str,
609 pos: usize,
610 glob_flags: GlobFlags,
611 captures: [(usize, usize); NSUBEXP],
613 captures_set: u16,
614 errors_found: u32,
616}
617
618impl<'a> PatMatcher<'a> {
619 pub fn new(prog: &'a PatProg, input: &'a str) -> Self {
620 PatMatcher {
621 prog,
622 input,
623 pos: 0,
624 glob_flags: prog.glob_start,
625 captures: [(0, 0); NSUBEXP],
626 captures_set: 0,
627 errors_found: 0,
628 }
629 }
630
631 pub fn try_match(&mut self) -> bool {
633 if let Some(ref pure) = self.prog.pure_string {
635 if self.glob_flags.igncase {
636 return self.input.eq_ignore_ascii_case(pure);
637 }
638 return self.input == pure;
639 }
640
641 if self.prog.flags.nogld && self.input.starts_with('.') {
643 return false;
644 }
645
646 self.match_nodes_at(&self.prog.code.clone(), 0)
647 }
648
649 fn match_nodes_at(&mut self, nodes: &[PatNode], start_idx: usize) -> bool {
650 let mut idx = start_idx;
651 while idx < nodes.len() {
652 let node = &nodes[idx];
653
654 if matches!(node, PatNode::Star) {
656 if idx + 1 >= nodes.len() {
658 self.pos = self.input.len();
659 return true;
660 }
661
662 let save_pos = self.pos;
664 let end_pos = if self.prog.flags.file {
665 self.input[self.pos..]
666 .find('/')
667 .map(|i| self.pos + i)
668 .unwrap_or(self.input.len())
669 } else {
670 self.input.len()
671 };
672
673 for try_pos in save_pos..=end_pos {
675 self.pos = try_pos;
676 if self.match_nodes_at(nodes, idx + 1) {
677 return true;
678 }
679 }
680 self.pos = save_pos;
681 return false;
682 }
683
684 if !self.match_node(node) {
685 return false;
686 }
687 idx += 1;
688 }
689 true
690 }
691
692 fn match_node(&mut self, node: &PatNode) -> bool {
693 match node {
694 PatNode::End => {
695 self.pos >= self.input.len() || self.prog.flags.noanch
698 }
699
700 PatNode::Exactly(s) => {
701 let remaining = &self.input[self.pos..];
702 if self.glob_flags.igncase {
703 if remaining.len() >= s.len() && remaining[..s.len()].eq_ignore_ascii_case(s) {
704 self.pos += s.len();
705 true
706 } else {
707 false
708 }
709 } else if remaining.starts_with(s) {
710 self.pos += s.len();
711 true
712 } else {
713 false
714 }
715 }
716
717 PatNode::Nothing => true,
718
719 PatNode::Any => {
720 if self.pos < self.input.len() {
721 let c = self.current_char();
722 if self.prog.flags.file && c == '/' {
724 return false;
725 }
726 self.pos += c.len_utf8();
727 true
728 } else {
729 false
730 }
731 }
732
733 PatNode::Star => {
734 if self.prog.flags.file {
738 if let Some(slash_pos) = self.input[self.pos..].find('/') {
739 self.pos += slash_pos;
740 } else {
741 self.pos = self.input.len();
742 }
743 } else {
744 self.pos = self.input.len();
745 }
746 true
747 }
748
749 PatNode::AnyOf(chars) => {
750 if self.pos >= self.input.len() {
751 return false;
752 }
753 let c = self.current_char();
754 let matched = if self.glob_flags.igncase {
755 chars.iter().any(|&ch| ch.eq_ignore_ascii_case(&c))
756 } else {
757 chars.contains(&c)
758 };
759 if matched {
760 self.pos += c.len_utf8();
761 true
762 } else {
763 false
764 }
765 }
766
767 PatNode::AnyBut(chars) => {
768 if self.pos >= self.input.len() {
769 return false;
770 }
771 let c = self.current_char();
772 let in_set = if self.glob_flags.igncase {
773 chars.iter().any(|&ch| ch.eq_ignore_ascii_case(&c))
774 } else {
775 chars.contains(&c)
776 };
777 if !in_set {
778 self.pos += c.len_utf8();
779 true
780 } else {
781 false
782 }
783 }
784
785 PatNode::Branch(alts, _) => {
786 let save_pos = self.pos;
787 for alt in alts {
789 self.pos = save_pos;
790 if self.match_node(alt) {
791 return true;
792 }
793 }
794 self.pos = save_pos;
795 false
796 }
797
798 PatNode::Sequence(nodes) => self.match_nodes_at(nodes, 0),
799
800 PatNode::OneHash(inner) => {
801 loop {
803 let save_pos = self.pos;
804 if !self.match_single_node(inner) {
805 self.pos = save_pos;
806 break;
807 }
808 if self.pos == save_pos {
810 break;
811 }
812 }
813 true
814 }
815
816 PatNode::TwoHash(inner) => {
817 if !self.match_single_node(inner) {
819 return false;
820 }
821 loop {
822 let save_pos = self.pos;
823 if !self.match_single_node(inner) {
824 self.pos = save_pos;
825 break;
826 }
827 if self.pos == save_pos {
828 break;
829 }
830 }
831 true
832 }
833
834 PatNode::Count { min, max, node } => {
835 let mut count = 0u32;
836 loop {
837 if let Some(m) = max {
838 if count >= *m {
839 break;
840 }
841 }
842 let save_pos = self.pos;
843 if !self.match_node(node) {
844 self.pos = save_pos;
845 break;
846 }
847 if self.pos == save_pos {
848 break;
849 }
850 count += 1;
851 }
852 count >= *min
853 }
854
855 PatNode::Open(n) => {
856 if *n > 0 && *n <= NSUBEXP {
857 self.captures[n - 1].0 = self.pos;
858 self.captures_set |= 1 << (n - 1);
859 }
860 true
861 }
862
863 PatNode::Close(n) => {
864 if *n > 0 && *n <= NSUBEXP {
865 self.captures[n - 1].1 = self.pos;
866 }
867 true
868 }
869
870 PatNode::NumRng(from, to) => self.match_number(Some(*from), Some(*to)),
871
872 PatNode::NumFrom(from) => self.match_number(Some(*from), None),
873
874 PatNode::NumTo(to) => self.match_number(None, Some(*to)),
875
876 PatNode::NumAny => self.match_number(None, None),
877
878 PatNode::IsStart => self.pos == 0,
879
880 PatNode::IsEnd => self.pos >= self.input.len(),
881
882 PatNode::GFlags(flags) => {
883 self.glob_flags = *flags;
884 true
885 }
886
887 PatNode::Exclude(inner) => {
888 let save_pos = self.pos;
890 let matched = self.match_nodes_at(inner, 0);
891 self.pos = save_pos;
892 !matched
893 }
894
895 PatNode::ExcludP(inner) => {
896 let save_pos = self.pos;
897 let matched = self.match_nodes_at(inner, 0);
898 self.pos = save_pos;
899 !matched
900 }
901
902 PatNode::WBranch(alts) => {
903 let save_pos = self.pos;
905 for alt in alts {
906 self.pos = save_pos;
907 if self.match_node(alt) && self.pos > save_pos {
908 return true;
909 }
910 }
911 self.pos = save_pos;
912 false
913 }
914
915 PatNode::ExcSync | PatNode::ExcEnd | PatNode::Back(_) | PatNode::CountStart => true,
916 }
917 }
918
919 fn current_char(&self) -> char {
920 self.input[self.pos..].chars().next().unwrap_or('\0')
921 }
922
923 fn match_single_node(&mut self, node: &PatNode) -> bool {
925 match node {
926 PatNode::Sequence(nodes) => self.match_nodes_at(nodes, 0),
927 _ => self.match_node(node),
928 }
929 }
930
931 fn match_number(&mut self, from: Option<i64>, to: Option<i64>) -> bool {
932 let start = self.pos;
933 let mut num_str = String::new();
934
935 while self.pos < self.input.len() {
937 let c = self.current_char();
938 if c.is_ascii_digit() {
939 num_str.push(c);
940 self.pos += 1;
941 } else {
942 break;
943 }
944 }
945
946 if num_str.is_empty() {
947 self.pos = start;
948 return false;
949 }
950
951 let num: i64 = match num_str.parse() {
952 Ok(n) => n,
953 Err(_) => {
954 self.pos = start;
955 return false;
956 }
957 };
958
959 let in_range = match (from, to) {
960 (Some(f), Some(t)) => num >= f && num <= t,
961 (Some(f), None) => num >= f,
962 (None, Some(t)) => num <= t,
963 (None, None) => true,
964 };
965
966 if !in_range {
967 self.pos = start;
968 return false;
969 }
970
971 true
972 }
973
974 pub fn captures(&self) -> &[(usize, usize); NSUBEXP] {
976 &self.captures
977 }
978
979 pub fn capture(&self, n: usize) -> Option<&'a str> {
981 if n == 0 || n > NSUBEXP {
982 return None;
983 }
984 if self.captures_set & (1 << (n - 1)) == 0 {
985 return None;
986 }
987 let (start, end) = self.captures[n - 1];
988 if start <= end && end <= self.input.len() {
989 Some(&self.input[start..end])
990 } else {
991 None
992 }
993 }
994}
995
996pub fn patcompile(pattern: &str, flags: PatFlags) -> Result<PatProg, String> {
998 PatCompiler::new(pattern, flags).compile()
999}
1000
1001pub fn patcompile_opts(
1003 pattern: &str,
1004 flags: PatFlags,
1005 extended_glob: bool,
1006 ksh_glob: bool,
1007 igncase: bool,
1008) -> Result<PatProg, String> {
1009 PatCompiler::new(pattern, flags)
1010 .with_options(extended_glob, ksh_glob)
1011 .with_igncase(igncase)
1012 .compile()
1013}
1014
1015pub fn pattry(prog: &PatProg, s: &str) -> bool {
1017 PatMatcher::new(prog, s).try_match()
1018}
1019
1020pub fn patmatch(pattern: &str, text: &str) -> bool {
1022 match patcompile(pattern, PatFlags::default()) {
1023 Ok(prog) => pattry(&prog, text),
1024 Err(_) => false,
1025 }
1026}
1027
1028pub fn patmatch_opts(
1030 pattern: &str,
1031 text: &str,
1032 extended_glob: bool,
1033 ksh_glob: bool,
1034 igncase: bool,
1035) -> bool {
1036 match patcompile_opts(
1037 pattern,
1038 PatFlags::default(),
1039 extended_glob,
1040 ksh_glob,
1041 igncase,
1042 ) {
1043 Ok(prog) => pattry(&prog, text),
1044 Err(_) => false,
1045 }
1046}
1047
1048pub fn patmatch_captures<'a>(prog: &'a PatProg, text: &'a str) -> Option<Vec<Option<&'a str>>> {
1050 let mut matcher = PatMatcher::new(prog, text);
1051 if matcher.try_match() {
1052 let mut captures = Vec::with_capacity(prog.npar);
1053 for i in 1..=prog.npar {
1054 captures.push(matcher.capture(i));
1055 }
1056 Some(captures)
1057 } else {
1058 None
1059 }
1060}
1061
1062pub fn pattrylen(prog: &PatProg, s: &str, len: usize) -> bool {
1064 let truncated = if len < s.len() { &s[..len] } else { s };
1065 pattry(prog, truncated)
1066}
1067
1068pub fn pattryrefs(prog: &PatProg, s: &str) -> Option<(bool, Vec<(usize, usize)>)> {
1070 let mut matcher = PatMatcher::new(prog, s);
1071 let matched = matcher.try_match();
1072 if matched {
1073 let refs: Vec<(usize, usize)> = (1..=prog.npar).map(|i| matcher.captures[i - 1]).collect();
1074 Some((true, refs))
1075 } else {
1076 Some((false, Vec::new()))
1077 }
1078}
1079
1080pub fn patmatchlen(prog: &PatProg, s: &str) -> Option<usize> {
1082 let mut matcher = PatMatcher::new(prog, s);
1083 if matcher.try_match() {
1084 Some(matcher.pos)
1085 } else {
1086 None
1087 }
1088}
1089
1090pub fn patgetglobflags(s: &str) -> Option<(GlobFlags, Option<PatOp>, usize)> {
1097 if !s.starts_with("(#") {
1098 return None;
1099 }
1100
1101 let mut flags = GlobFlags::default();
1102 let mut assert_op = None;
1103 let mut pos = 2; let bytes = s.as_bytes();
1105
1106 while pos < bytes.len() && bytes[pos] != b')' {
1107 match bytes[pos] {
1108 b'q' => {
1109 while pos < bytes.len() && bytes[pos] != b')' {
1111 pos += 1;
1112 }
1113 break;
1114 }
1115 b'a' => {
1116 pos += 1;
1118 let mut num_str = String::new();
1119 while pos < bytes.len() && bytes[pos].is_ascii_digit() {
1120 num_str.push(bytes[pos] as char);
1121 pos += 1;
1122 }
1123 flags.approx = num_str.parse().unwrap_or(1).min(254);
1124 continue; }
1126 b'l' => {
1127 flags.lcmatchuc = true;
1128 flags.igncase = false;
1129 }
1130 b'i' => {
1131 flags.igncase = true;
1132 flags.lcmatchuc = false;
1133 }
1134 b'I' => {
1135 flags.igncase = false;
1136 flags.lcmatchuc = false;
1137 }
1138 b'b' => {
1139 flags.backref = true;
1140 }
1141 b'B' => {
1142 flags.backref = false;
1143 }
1144 b'm' => {
1145 flags.matchref = true;
1146 }
1147 b'M' => {
1148 flags.matchref = false;
1149 }
1150 b's' => {
1151 assert_op = Some(PatOp::IsStart);
1152 }
1153 b'e' => {
1154 assert_op = Some(PatOp::IsEnd);
1155 }
1156 b'u' => {
1157 flags.multibyte = true;
1158 }
1159 b'U' => {
1160 flags.multibyte = false;
1161 }
1162 _ => return None,
1163 }
1164 pos += 1;
1165 }
1166
1167 if pos >= bytes.len() || bytes[pos] != b')' {
1168 return None;
1169 }
1170 pos += 1; if assert_op.is_some() && pos - 3 > 1 {
1174 return None;
1176 }
1177
1178 Some((flags, assert_op, pos))
1179}
1180
1181pub fn patmatchrange(range: &[char], ch: char, igncase: bool) -> bool {
1184 let ch = if igncase { ch.to_ascii_lowercase() } else { ch };
1185 for &rc in range {
1186 let rc = if igncase { rc.to_ascii_lowercase() } else { rc };
1187 if rc == ch {
1188 return true;
1189 }
1190 }
1191 false
1192}
1193
1194pub fn patmatchindex(range: &[char], idx: usize) -> Option<char> {
1196 range.get(idx).copied()
1197}
1198
1199pub fn haswilds(s: &str) -> bool {
1201 for c in s.chars() {
1202 match c {
1203 '*' | '?' | '[' | '#' | '^' | '~' | '<' | '>' => return true,
1204 _ => {}
1205 }
1206 }
1207 false
1208}
1209
1210pub fn patrepeat(prog: &PatProg, s: &str, max: Option<usize>) -> usize {
1212 let mut matcher = PatMatcher::new(prog, s);
1213 let mut count = 0;
1214 loop {
1215 if let Some(m) = max {
1216 if count >= m {
1217 break;
1218 }
1219 }
1220 let save = matcher.pos;
1221 if !matcher.match_nodes_at(&prog.code, 0) {
1222 matcher.pos = save;
1223 break;
1224 }
1225 if matcher.pos == save {
1226 break; }
1228 count += 1;
1229 }
1230 count
1231}
1232
1233#[derive(Debug, Default, Clone)]
1235pub struct PatternScope {
1236 pub disabled: Vec<String>,
1237}
1238
1239use std::sync::Mutex;
1240
1241static PATTERN_SCOPES: Mutex<Vec<PatternScope>> = Mutex::new(Vec::new());
1242
1243pub fn startpatternscope() {
1245 PATTERN_SCOPES.lock().unwrap().push(PatternScope::default());
1246}
1247
1248pub fn endpatternscope() {
1250 PATTERN_SCOPES.lock().unwrap().pop();
1251}
1252
1253pub fn savepatterndisables() -> Vec<String> {
1255 PATTERN_SCOPES
1256 .lock()
1257 .unwrap()
1258 .last()
1259 .map(|s| s.disabled.clone())
1260 .unwrap_or_default()
1261}
1262
1263pub fn restorepatterndisables(disables: Vec<String>) {
1265 if let Some(scope) = PATTERN_SCOPES.lock().unwrap().last_mut() {
1266 scope.disabled = disables;
1267 }
1268}
1269
1270pub fn clearpatterndisables() {
1272 if let Some(scope) = PATTERN_SCOPES.lock().unwrap().last_mut() {
1273 scope.disabled.clear();
1274 }
1275}
1276
1277pub fn freepatprog(_prog: PatProg) {
1279 }
1281
1282pub fn pat_enables(cmd: &str, patterns: &[&str], enable: bool) -> i32 {
1284 let _ = (cmd, patterns, enable);
1285 0
1287}
1288
1289pub const COLON_CLASSES: &[&str] = &[
1291 "alpha",
1292 "alnum",
1293 "ascii",
1294 "blank",
1295 "cntrl",
1296 "digit",
1297 "graph",
1298 "lower",
1299 "print",
1300 "punct",
1301 "space",
1302 "upper",
1303 "xdigit",
1304 "IDENT",
1305 "IFS",
1306 "IFSSPACE",
1307 "WORD",
1308 "INCOMPLETE",
1309 "INVALID",
1310];
1311
1312pub fn range_type(name: &str) -> Option<usize> {
1314 COLON_CLASSES.iter().position(|&c| c == name)
1315}
1316
1317pub fn pattern_range_to_string(range_type_idx: usize) -> Option<String> {
1319 COLON_CLASSES
1320 .get(range_type_idx)
1321 .map(|s| format!("[:{}:]", s))
1322}
1323
1324pub fn clear_shiftstate() {}
1331
1332pub fn metacharinc(s: &str, pos: usize) -> usize {
1334 let c = s[pos..].chars().next().map(|c| c.len_utf8()).unwrap_or(1);
1335 pos + c
1336}
1337
1338pub fn patadd(prog: &mut Vec<PatNode>, node: PatNode) {
1340 prog.push(node);
1341}
1342
1343pub fn patcompcharsset() {}
1345
1346pub fn patcompstart() {}
1348
1349pub fn patcompswitch(pattern: &str, flags: PatFlags) -> Result<PatProg, String> {
1351 patcompile(pattern, flags)
1352}
1353
1354pub fn patcompbranch(pattern: &str, flags: PatFlags) -> Result<PatProg, String> {
1356 patcompile(pattern, flags)
1357}
1358
1359pub fn patcomppiece(pattern: &str, flags: PatFlags) -> Result<PatProg, String> {
1361 patcompile(pattern, flags)
1362}
1363
1364pub fn patcompnot(pattern: &str, flags: PatFlags) -> Result<PatProg, String> {
1366 let negated = format!("^({})", pattern);
1367 patcompile(&negated, flags)
1368}
1369
1370pub fn patnode(prog: &mut Vec<PatNode>, node: PatNode) -> usize {
1372 let idx = prog.len();
1373 prog.push(node);
1374 idx
1375}
1376
1377pub fn patinsert(prog: &mut Vec<PatNode>, pos: usize, node: PatNode) {
1379 if pos <= prog.len() {
1380 prog.insert(pos, node);
1381 }
1382}
1383
1384pub fn pattail(_prog: &[PatNode], _p: usize, _val: usize) {}
1386
1387pub fn patoptail(_prog: &[PatNode], _p: usize, _val: usize) {}
1389
1390pub fn charref(s: &str, pos: usize) -> Option<char> {
1392 s[pos..].chars().next()
1393}
1394
1395pub fn charnext(s: &str, pos: usize) -> usize {
1397 metacharinc(s, pos)
1398}
1399
1400pub fn charrefinc(s: &str, pos: &mut usize) -> Option<char> {
1402 let c = s[*pos..].chars().next()?;
1403 *pos += c.len_utf8();
1404 Some(c)
1405}
1406
1407pub fn charsub(s: &str, pos: usize) -> usize {
1409 if pos == 0 {
1410 return 0;
1411 }
1412 let prev = s[..pos]
1413 .chars()
1414 .next_back()
1415 .map(|c| c.len_utf8())
1416 .unwrap_or(1);
1417 pos - prev
1418}
1419
1420pub fn pattrystart() {}
1422
1423pub fn patmungestring(s: &str) -> String {
1425 s.to_string()
1426}
1427
1428pub fn mb_patmatchrange(range: &[char], ch: char, igncase: bool) -> bool {
1430 patmatchrange(range, ch, igncase)
1431}
1432
1433pub fn mb_patmatchindex(range: &[char], idx: usize) -> Option<char> {
1435 patmatchindex(range, idx)
1436}
1437
1438pub fn patallocstr(s: &str) -> String {
1440 s.to_string()
1441}
1442
1443#[cfg(test)]
1444mod tests {
1445 use super::*;
1446
1447 #[test]
1448 fn test_simple_literal() {
1449 assert!(patmatch("hello", "hello"));
1450 assert!(!patmatch("hello", "world"));
1451 assert!(!patmatch("hello", "hell"));
1452 }
1453
1454 #[test]
1455 fn test_star() {
1456 assert!(patmatch("*", "anything"));
1457 assert!(patmatch("*", ""));
1458 assert!(patmatch("h*o", "hello"));
1459 assert!(patmatch("h*o", "ho"));
1460 assert!(!patmatch("h*o", "hi"));
1461 }
1462
1463 #[test]
1464 fn test_question() {
1465 assert!(patmatch("?", "a"));
1466 assert!(!patmatch("?", "ab"));
1467 assert!(patmatch("h?llo", "hello"));
1468 assert!(patmatch("h?llo", "hallo"));
1469 assert!(!patmatch("h?llo", "hllo"));
1470 }
1471
1472 #[test]
1473 fn test_bracket() {
1474 assert!(patmatch("[abc]", "a"));
1475 assert!(patmatch("[abc]", "b"));
1476 assert!(!patmatch("[abc]", "d"));
1477 assert!(patmatch("[a-z]", "m"));
1478 assert!(!patmatch("[a-z]", "5"));
1479 }
1480
1481 #[test]
1482 fn test_bracket_negated() {
1483 assert!(!patmatch("[!abc]", "a"));
1484 assert!(patmatch("[!abc]", "d"));
1485 assert!(patmatch("[^abc]", "x"));
1486 }
1487
1488 #[test]
1489 fn test_escape() {
1490 assert!(patmatch("\\*", "*"));
1491 assert!(!patmatch("\\*", "a"));
1492 assert!(patmatch("\\?", "?"));
1493 }
1494
1495 #[test]
1496 fn test_numeric_range() {
1497 assert!(patmatch("<1-10>", "5"));
1498 assert!(patmatch("<1-10>", "1"));
1499 assert!(patmatch("<1-10>", "10"));
1500 assert!(!patmatch("<1-10>", "0"));
1501 assert!(!patmatch("<1-10>", "11"));
1502 }
1503
1504 #[test]
1505 fn test_case_insensitive() {
1506 assert!(patmatch_opts("Hello", "HELLO", true, true, true));
1507 assert!(patmatch_opts("Hello", "hello", true, true, true));
1508 assert!(!patmatch_opts("Hello", "HELLO", true, true, false));
1509 }
1510
1511 #[test]
1512 fn test_extended_hash() {
1513 assert!(patmatch("a#", ""));
1515 assert!(patmatch("a#", "a"));
1516 assert!(patmatch("a#", "aaa"));
1517 }
1518
1519 #[test]
1520 fn test_captures() {
1521 let prog = patcompile("(foo)(bar)", PatFlags::default()).unwrap();
1522 let captures = patmatch_captures(&prog, "foobar").unwrap();
1523 assert_eq!(captures.len(), 2);
1524 assert_eq!(captures[0], Some("foo"));
1525 assert_eq!(captures[1], Some("bar"));
1526 }
1527
1528 #[test]
1529 fn test_posix_class() {
1530 assert!(patmatch("[[:alpha:]]", "a"));
1531 assert!(patmatch("[[:alpha:]]", "Z"));
1532 assert!(!patmatch("[[:alpha:]]", "5"));
1533 assert!(patmatch("[[:digit:]]", "5"));
1534 assert!(!patmatch("[[:digit:]]", "a"));
1535 }
1536
1537 #[test]
1538 fn test_pure_string_optimization() {
1539 let prog = patcompile("hello", PatFlags::default()).unwrap();
1540 assert!(prog.flags.pures);
1541 assert!(prog.pure_string.is_some());
1542 }
1543
1544 #[test]
1545 fn test_ksh_glob_plus() {
1546 assert!(patmatch("+(ab)", "ab"));
1548 assert!(patmatch("+(ab)", "abab"));
1549 assert!(!patmatch("+(ab)", ""));
1550 }
1551
1552 #[test]
1553 fn test_ksh_glob_star() {
1554 assert!(patmatch("*(ab)", ""));
1556 assert!(patmatch("*(ab)", "ab"));
1557 assert!(patmatch("*(ab)", "ababab"));
1558 }
1559
1560 #[test]
1561 fn test_ksh_glob_question() {
1562 assert!(patmatch("?(ab)c", "c"));
1564 assert!(patmatch("?(ab)c", "abc"));
1565 }
1566
1567 #[test]
1568 fn test_pattrylen() {
1569 let prog = patcompile("hello", PatFlags::default()).unwrap();
1570 assert!(pattrylen(&prog, "hello world", 5));
1571 assert!(!pattrylen(&prog, "hello world", 3));
1572 }
1573
1574 #[test]
1575 fn test_patmatchlen() {
1576 let prog = patcompile(
1577 "hel*",
1578 PatFlags {
1579 noanch: true,
1580 ..Default::default()
1581 },
1582 )
1583 .unwrap();
1584 let len = patmatchlen(&prog, "hello world");
1585 assert!(len.is_some());
1586 }
1587
1588 #[test]
1589 fn test_patgetglobflags() {
1590 let (flags, assert_op, consumed) = patgetglobflags("(#i)rest").unwrap();
1591 assert!(flags.igncase);
1592 assert!(assert_op.is_none());
1593 assert_eq!(consumed, 4);
1594
1595 let (flags, _, _) = patgetglobflags("(#l)rest").unwrap();
1596 assert!(flags.lcmatchuc);
1597 assert!(!flags.igncase);
1598
1599 let (_, assert_op, _) = patgetglobflags("(#s)rest").unwrap();
1600 assert_eq!(assert_op, Some(PatOp::IsStart));
1601
1602 let (flags, _, _) = patgetglobflags("(#bm)rest").unwrap();
1603 assert!(flags.backref);
1604 assert!(flags.matchref);
1605 }
1606
1607 #[test]
1608 fn test_haswilds() {
1609 assert!(haswilds("*.txt"));
1610 assert!(haswilds("file?"));
1611 assert!(haswilds("[abc]"));
1612 assert!(haswilds("foo#"));
1613 assert!(!haswilds("plain"));
1614 }
1615
1616 #[test]
1617 fn test_patmatchrange() {
1618 let range = vec!['a', 'b', 'c'];
1619 assert!(patmatchrange(&range, 'a', false));
1620 assert!(!patmatchrange(&range, 'd', false));
1621 assert!(patmatchrange(&range, 'A', true));
1622 }
1623
1624 #[test]
1625 fn test_range_type() {
1626 assert_eq!(range_type("alpha"), Some(0));
1627 assert_eq!(range_type("digit"), Some(5));
1628 assert_eq!(range_type("nonexistent"), None);
1629 }
1630
1631 #[test]
1632 fn test_pattern_range_to_string() {
1633 assert_eq!(pattern_range_to_string(0), Some("[:alpha:]".to_string()));
1634 assert_eq!(pattern_range_to_string(5), Some("[:digit:]".to_string()));
1635 }
1636}