1use crate::vval::{VVal, Env, VValFun, StackAction};
8
9use crate::parser::state::State;
10use crate::parser::state::{ParseError, ParseErrorKind};
11use crate::parser::{parse_str_backslash, EscSeqValue};
12
13use std::rc::Rc;
14use std::cell::RefCell;
15
16use crate::str_int::s2sym;
17
18fn is_ident_pattern_char(c: char) -> bool {
19 match c {
20 '?' | '|' | '[' | ']' | '(' | ')' | '$' | '*'
21 => false,
22 _ => !c.is_whitespace(),
23 }
24}
25
26fn is_ident_selector_char(c: char) -> bool {
27 match c {
28 '!' | '?' | '/' | '\\' | '|' | '{' | '}'
29 | '[' | ']' | '(' | ')' | '\'' | '^'
30 | '&' | '$' | ':' | ';' | '*' | '=' | ','
31 => false,
32 _ => !c.is_whitespace(),
33 }
34}
35
36fn parse_ident_char(ps: &mut State) -> Result<Option<char>, ParseError> {
37 if let Some(c) = ps.peek() {
38 match c {
39 '\\' => {
40 ps.consume();
41 let c =
42 match parse_str_backslash(ps)? {
43 EscSeqValue::Char(c) => c,
44 EscSeqValue::Byte(b) => b as char,
45 };
46 ps.skip_ws();
47 Ok(Some(c))
48 },
49 c if ps.is_pattern_ident_mode() && is_ident_pattern_char(c) => {
50 ps.consume_ws();
51 Ok(Some(c))
52 },
53 c if !ps.is_pattern_ident_mode() && is_ident_selector_char(c) => {
54 ps.consume_ws();
55 Ok(Some(c))
56 },
57 _ => Ok(None),
58 }
59 } else {
60 Ok(None)
61 }
62}
63
64fn parse_ident(ps: &mut State, one_char: bool) -> Result<VVal, ParseError> {
65 let mut uh = String::new();
66
67 if one_char {
68 if let Some(c) = parse_ident_char(ps)? {
69 uh.push(c);
70 }
71 } else {
72 while let Some(c) = parse_ident_char(ps)? {
73 uh.push(c);
74 }
75 }
76
77 if uh.is_empty() {
78 return Err(ps.err(
79 ParseErrorKind::UnexpectedToken(
80 ps.peek().unwrap_or(' '),
81 "identifier character")));
82 }
83
84 let r = VVal::pair(VVal::new_sym("I"), VVal::new_sym(&uh));
85
86 Ok(r)
87}
88
89fn parse_glob_cclass(ps: &mut State) -> Result<VVal, ParseError> {
90 if !ps.consume_if_eq('[') {
91 return Err(ps.err(
92 ParseErrorKind::ExpectedToken('[', "char class start")));
93 }
94
95 let neg = ps.consume_if_eq('^');
96
97 let mut chars = String::new();
98
99 let mut c = ps.expect_some(ps.peek())?;
100 let mut last : Option<char> = None;
101 let mut find_range_end = false;
102
103 while c != ']' {
104 ps.consume();
105
106 if last.is_some() && c == '-' {
107 find_range_end = true;
108 c = ps.expect_some(ps.peek())?;
109 continue;
110 }
111
112 c =
113 if c == '\\' {
114 match parse_str_backslash(ps)? {
115 EscSeqValue::Char(c) => c,
116 EscSeqValue::Byte(b) => b as char,
117 }
118 } else {
119 c
120 };
121
122 if find_range_end {
123 let start = last.unwrap() as u32 + 1;
124 let end = c as u32;
125 for c_idx in start..end {
126 if let Ok(c) = char::try_from(c_idx) {
127 chars.push(c);
128 }
129 }
130 chars.push(c);
131
132 last = None;
133 find_range_end = false;
134 } else {
135 chars.push(c);
136 last = Some(c);
137 }
138
139 c = ps.expect_some(ps.peek())?;
140 }
141
142 if find_range_end {
143 chars.push('-');
144 }
145
146 if !ps.consume_if_eq_ws(']') {
147 return Err(ps.err(
148 ParseErrorKind::ExpectedToken(']', "char class end")));
149 }
150
151 Ok(VVal::pair(
152 VVal::new_sym(
153 if neg { "NCCls" }
154 else { "CCls" }),
155 VVal::new_str_mv(chars)))
156}
157
158fn parse_pat_regex(ps: &mut State) -> Result<VVal, ParseError> {
159 match ps.expect_some(ps.peek())? {
160 '*' => {
161 ps.consume_ws();
162 Ok(VVal::pair(VVal::new_sym("N0"), parse_rx_atom(ps)?))
163 },
164 '+' => {
165 ps.consume_ws();
166 Ok(VVal::pair(VVal::new_sym("N1"), parse_rx_atom(ps)?))
167 },
168 '<' => {
169 ps.consume_ws();
170 match ps.expect_some(ps.peek())? {
171 '*' => {
172 ps.consume_ws();
173 Ok(VVal::pair(VVal::new_sym("N0-"), parse_rx_atom(ps)?))
174 },
175 '+' => {
176 ps.consume_ws();
177 Ok(VVal::pair(VVal::new_sym("N1-"), parse_rx_atom(ps)?))
178 },
179 '?' => {
180 ps.consume_ws();
181 Ok(VVal::pair(VVal::new_sym("Opt-"), parse_rx_atom(ps)?))
182 },
183 c =>
184 Err(ps.err(
185 ParseErrorKind::UnexpectedToken(
186 c, "non-greedy regex pattern"))),
187 }
188 }
189 '?' => {
190 ps.consume_ws();
191 Ok(VVal::pair(VVal::new_sym("Opt"), parse_rx_atom(ps)?))
192 },
193 '!' => {
194 ps.consume_ws();
195 Ok(VVal::pair(VVal::new_sym("ZwNegLA"), parse_rx_atom(ps)?))
196 },
197 '=' => {
198 ps.consume_ws();
199 Ok(VVal::pair(VVal::new_sym("ZwLA"), parse_rx_atom(ps)?))
200 },
201 '^' => { ps.consume_ws(); Ok(VVal::new_sym("Start")) },
202 '$' => { ps.consume_ws(); Ok(VVal::new_sym("End")) },
203 '&' => { ps.consume_ws();
204 match ps.expect_some(ps.peek())? {
205 'L' => { ps.consume_ws(); Ok(VVal::new_sym("ToLowercase")) },
206 'U' => { ps.consume_ws(); Ok(VVal::new_sym("ToUppercase")) },
207 c =>
208 Err(ps.err(
209 ParseErrorKind::UnexpectedToken(c, "match modifier"))),
210 }
211 },
212 's' => { ps.consume_ws(); Ok(VVal::new_sym("WsChar")) },
213 'S' => { ps.consume_ws(); Ok(VVal::new_sym("NWsChar")) },
214 c =>
215 Err(ps.err(
216 ParseErrorKind::UnexpectedToken(c, "regex pattern"))),
217 }
218}
219
220fn parse_glob_group(ps: &mut State) -> Result<VVal, ParseError> {
221 if !ps.consume_if_eq_ws('(') {
222 return Err(ps.err(
223 ParseErrorKind::ExpectedToken('(', "sub pattern start")));
224 }
225
226 let capture = ps.consume_if_eq_ws('^');
227 let p = parse_pattern(ps)?;
228
229 if !ps.consume_if_eq_ws(')') {
230 return Err(ps.err(
231 ParseErrorKind::ExpectedToken(')', "sub pattern end")));
232 }
233
234 if capture {
235 Ok(VVal::pair(VVal::new_sym("PatCap"), p))
236 } else {
237 Ok(VVal::pair(VVal::new_sym("PatSub"), p))
238 }
239}
240
241fn parse_pat_glob(ps: &mut State) -> Result<VVal, ParseError> {
242 match ps.expect_some(ps.peek())? {
243 '*' => { ps.consume_ws(); Ok(VVal::new_sym("Glob")) },
244 '?' => { ps.consume_ws(); Ok(VVal::new_sym("Any")) },
245 '$' => { ps.consume_ws(); parse_pat_regex(ps) },
246 '[' => parse_glob_cclass(ps),
247 '(' => parse_glob_group(ps),
248 c =>
249 Err(ps.err(
250 ParseErrorKind::UnexpectedToken(c, "in glob pattern"))),
251 }
252}
253
254fn parse_rx_atom(ps: &mut State) -> Result<VVal, ParseError> {
255 match ps.expect_some(ps.peek())? {
256 '*' | '?' | '[' | '(' | '$'
257 => parse_pat_glob(ps),
258 _ => parse_ident(ps, true)
259 }
260}
261
262fn parse_glob_atom(ps: &mut State) -> Result<VVal, ParseError> {
263 match ps.expect_some(ps.peek())? {
264 '*' | '?' | '[' | '(' | '$'
265 => parse_pat_glob(ps),
266 _ => parse_ident(ps, false)
267 }
268}
269
270fn parse_pat_branch(ps: &mut State) -> Result<VVal, ParseError> {
271 let pat_branch = VVal::vec();
272
273 while !ps.at_end()
274 && ( (ps.is_pattern_ident_mode() && !ps.lookahead_one_of("|)]"))
275 || (!ps.is_pattern_ident_mode() && !ps.lookahead_one_of("|:&=)]}/,")))
276 {
277 pat_branch.push(parse_glob_atom(ps)?);
279 }
280
281 Ok(pat_branch)
282}
283
284pub fn parse_pattern(ps: &mut State) -> Result<VVal, ParseError> {
287 let mut pat = parse_pat_branch(ps)?;
288
289 if ps.consume_if_eq_ws('|') {
290 let pat_alt = parse_pattern(ps)?;
291 pat = VVal::vec3(VVal::new_sym("Alt"), pat, pat_alt);
292 }
293
294 Ok(pat)
295}
296
297fn parse_index(ps: &mut State) -> Result<VVal, ParseError> {
298 let uh = ps.take_while(|c| c.is_digit(10));
299
300 if let Ok(cn) = uh.to_string().parse::<i64>() {
301 ps.skip_ws();
302 Ok(VVal::Int(cn as i64))
303 } else {
304 Err(ps.err(ParseErrorKind::BadEscape("Bad number as index")))
305 }
306}
307
308fn parse_key(ps: &mut State) -> Result<VVal, ParseError> {
309 match ps.expect_some(ps.peek())? {
310 '0' | '1' | '2' | '3' | '4'
311 | '5' | '6' | '7' | '8' | '9'
312 => parse_index(ps),
313 _ => parse_pattern(ps),
314 }
315}
316
317fn parse_kv(ps: &mut State) -> Result<VVal, ParseError> {
318 let key = parse_key(ps)?;
319
320 if !ps.consume_if_eq_ws('=') {
321 return Err(ps.err(
322 ParseErrorKind::ExpectedToken('=', "key/value pattern")));
323 }
324
325 let val = parse_pattern(ps)?;
326
327 Ok(VVal::vec2(key, val))
328}
329
330fn parse_kv_item(ps: &mut State) -> Result<VVal, ParseError> {
331 if !ps.consume_if_eq_ws('{') {
332 return Err(ps.err(
333 ParseErrorKind::ExpectedToken('{', "key/value node pattern start")));
334 }
335
336 let kv = parse_kv(ps)?;
337
338 let v = VVal::vec2(VVal::new_sym("KV"), kv);
339
340 while ps.expect_some(ps.peek())? == ',' {
341 ps.consume_ws();
342 let kv = parse_kv(ps)?;
343 v.push(kv);
344 }
345
346 if !ps.consume_if_eq_ws('}') {
347 return Err(ps.err(
348 ParseErrorKind::ExpectedToken('}', "in key/value node pattern end")));
349 }
350
351 Ok(v)
352}
353
354fn parse_node_match(ps: &mut State) -> Result<VVal, ParseError> {
355 if !ps.consume_if_eq_ws(':') {
356 return Err(ps.err(
357 ParseErrorKind::ExpectedToken(
358 ':', "start of node match")));
359 }
360
361 let negated = ps.consume_if_eq_ws('!');
362
363 let mut ret =
364 match ps.expect_some(ps.peek())? {
365 '(' => {
366 ps.consume_ws();
367 let ret =
368 VVal::vec2(
369 VVal::new_sym("LA"),
370 parse_selector_pattern(ps)?);
371
372 if !ps.consume_if_eq_ws(')') {
373 return Err(ps.err(
374 ParseErrorKind::ExpectedToken(
375 ')', "at end of look-ahead selector")));
376 }
377
378 ret
379 },
380 's' if ps.consume_lookahead("str") => {
381 ps.skip_ws();
382 if !ps.consume_if_eq_ws('=') {
383 return Err(ps.err(
384 ParseErrorKind::ExpectedToken('=', "str node match")));
385 }
386
387 VVal::vec2(
388 VVal::new_sym("Str"),
389 parse_pattern(ps)?)
390 },
391 't' if ps.consume_lookahead("type") => {
392 ps.skip_ws();
393 if !ps.consume_if_eq_ws('=') {
394 return Err(ps.err(
395 ParseErrorKind::ExpectedToken('=', "type node match")));
396 }
397
398 VVal::vec2(
399 VVal::new_sym("Type"),
400 parse_pattern(ps)?)
401 },
402 '{' => parse_kv_item(ps)?,
403 c => {
404 return Err(ps.err(
405 ParseErrorKind::UnexpectedToken(c, "node match")));
406 }
407 };
408
409 if negated {
410 ret = VVal::vec2(VVal::new_sym("Not"), ret);
411 }
412
413 Ok(ret)
414}
415
416fn parse_node_cond(ps: &mut State) -> Result<VVal, ParseError> {
417 let nm = parse_node_match(ps)?;
418
419 match ps.peek().unwrap_or('\0') {
420 '&' => {
421 ps.consume_ws();
422
423 let v = VVal::vec();
424 v.push(VVal::new_sym("And"));
425 v.push(nm);
426 v.push(parse_node_cond(ps)?);
427 Ok(v)
428 },
429 '|' => {
430 ps.consume_ws();
431
432 let v = VVal::vec();
433 v.push(VVal::new_sym("Or"));
434 v.push(nm);
435 v.push(parse_node_cond(ps)?);
436 Ok(v)
437 },
438 _ => Ok(nm),
439 }
440}
441
442fn parse_rec_cond(ps: &mut State) -> Result<VVal, ParseError> {
443 if ps.consume_lookahead("key") {
444 ps.skip_ws();
445 if ps.consume_if_eq_ws('=') {
446 ps.skip_ws();
447 Ok(VVal::vec2(VVal::new_sym("NotKey"), parse_pattern(ps)?))
448
449 } else {
450 Err(ps.err(
451 ParseErrorKind::ExpectedToken(
452 '=', "`key =` in recursion condition")))
453 }
454 } else {
455 Err(ps.err(
456 ParseErrorKind::UnexpectedToken(
457 ps.peek().unwrap(), "recursion condition")))
458 }
459}
460
461fn parse_node(ps: &mut State) -> Result<VVal, ParseError> {
462 let c = ps.expect_some(ps.peek())?;
463
464 match c {
465 '*' if ps.consume_lookahead("**") => {
466 ps.skip_ws();
467
468 let rec_cond =
469 if ps.peek().unwrap_or('\0') == '!' {
470 ps.consume_ws();
471 parse_rec_cond(ps)?
472 } else {
473 VVal::None
474 };
475
476 let recval_cond =
477 if ps.peek().unwrap_or('\0') == '=' {
478 ps.consume_ws();
479 parse_node_cond(ps)?
480 } else {
481 VVal::None
482 };
483
484 let rg =
485 if ps.peek().unwrap_or('\0') == ':' {
486 VVal::vec3(VVal::new_sym("RecGlob"), rec_cond, parse_node_cond(ps)?)
487 } else {
488 VVal::vec3(VVal::new_sym("RecGlob"), rec_cond, VVal::None)
489 };
490
491 rg.push(recval_cond);
492
493 Ok(rg)
494 },
495 '^' => {
496 ps.consume_ws();
497 Ok(VVal::vec2(
498 VVal::new_sym("NCap"),
499 parse_node(ps)?))
500 },
501 _ => {
502 let key = parse_key(ps)?;
503 if ps.peek().unwrap_or('\0') == ':' {
504 Ok(VVal::vec3(
505 VVal::new_sym("NK"), key, parse_node_cond(ps)?))
506 } else {
507 Ok(VVal::vec2(
508 VVal::new_sym("NK"), key))
509 }
510 }
511 }
512}
513
514fn parse_selector_pattern(ps: &mut State) -> Result<VVal, ParseError> {
515 let selector = VVal::vec1(VVal::new_sym("Path"));
516
517 let node = parse_node(ps)?;
518 selector.push(node);
519
520 while ps.consume_if_eq_ws('/') {
521 let node = parse_node(ps)?;
523 selector.push(node);
524 }
525
526 Ok(selector)
527}
528
529fn parse_selector(s: &str) -> Result<VVal, ParseError> {
530 let mut ps = State::new(s, "<selector>");
531 ps.skip_ws();
532
533 let ret = parse_selector_pattern(&mut ps)?;
534
535 ps.skip_ws();
536
537 if !ps.at_end() {
538 return Err(ps.err(
539 ParseErrorKind::UnexpectedToken(
540 ps.peek().unwrap(), "end of selector")));
541 }
542
543 Ok(ret)
544}
545
546#[derive(Debug, Clone)]
548pub(crate) struct SelectorState {
549 orig_string_len: usize,
550 captures: Vec<(usize, usize)>,
551 selector_captures: Vec<VVal>,
552}
553
554impl SelectorState {
555 fn new() -> Self {
556 Self {
557 orig_string_len: 0,
558 captures: Vec::new(),
559 selector_captures: Vec::new(),
560 }
561 }
562
563 fn push_sel_capture(&mut self, v: VVal) {
564 self.selector_captures.push(v);
565 }
566
567 fn has_captures(&self) -> bool { !self.selector_captures.is_empty() }
568
569 fn get_sel_captures(&self) -> VVal {
570 VVal::vec_from(&self.selector_captures[..])
571 }
572
573 fn pop_sel_caputure(&mut self) {
574 self.selector_captures.pop();
575 }
576
577 fn push_capture_start(&mut self, s: &RxBuf) -> usize {
578 self.captures.push((s.offs, 0));
580 self.captures.len() - 1
581 }
582
583 fn set_capture_end(&mut self, idx: usize, s: &RxBuf) -> (usize, (usize, usize)) {
584 let offs = self.captures[idx].0;
586 self.captures[idx].1 = s.offs - offs;
587 (idx, self.captures[idx])
588 }
589
590 fn pop_capture(&mut self) {
591 self.captures.pop();
593 }
594
595 fn set_str(&mut self, s: &str) -> usize {
596 std::mem::replace(&mut self.orig_string_len, s.len())
597 }
598
599 fn restore_str(&mut self, os_len: usize) {
600 self.orig_string_len = os_len;
601 }
602}
603
604#[derive(Debug, Clone, Copy)]
606pub(crate) struct RxBuf<'a> {
607 s: &'a str,
608 offs: usize,
609 orig_len: usize,
610}
611
612impl std::fmt::Display for RxBuf<'_> {
613 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
614 write!(f, "{}[{},{}]", self.s, self.offs, self.orig_len)
615 }
616}
617
618impl<'a> RxBuf<'a> {
619 fn new(s: &'a str) -> Self {
620 Self {
621 s,
622 offs: 0,
623 orig_len: s.len(),
624 }
625 }
626
627 fn is_at_start(&self) -> bool {
628 self.offs == 0
629 }
630
631 fn is_at_end(&self) -> bool {
632 self.offs == self.orig_len
633 }
634
635 fn offs(&self, offs: usize) -> Self {
636 Self {
637 s: &self.s[offs..],
638 offs: self.offs + offs,
639 orig_len: self.orig_len,
640 }
641 }
642}
643
644#[allow(dead_code)]
646#[derive(Debug, Clone)]
647pub(crate) struct CaptureNode {
648 idx: usize,
649 len: usize,
650 next: Option<Box<CaptureNode>>,
651}
652
653impl CaptureNode {
654 fn to_string(&self, input: &str) -> String {
663 input[self.idx..(self.idx + self.len)].to_string()
664 }
665
666 #[allow(dead_code)]
667 fn to_test_string(&self, input: &str) -> String {
668 if let Some(n) = &self.next {
669 input[self.idx..(self.idx + self.len)].to_string()
670 + "/"
671 + &n.to_test_string(input)
672 } else {
673 input[self.idx..(self.idx + self.len)].to_string()
674 }
675 }
676}
677
678fn append_capture(cap_idx: usize, v: &mut Vec<Option<Box<CaptureNode>>>, cap: &(usize, usize)) {
679 let pos = v.get_mut(cap_idx).unwrap();
680 let tail = pos.take();
681 *pos = Some(Box::new(CaptureNode {
682 idx: cap.0,
683 len: cap.1,
684 next: tail
685 }));
686}
687
688#[derive(Debug, Clone)]
691pub(crate) struct PatResult {
692 matched: bool,
693 match_len: usize,
694 offs: usize,
695 captures: Option<Vec<Option<Box<CaptureNode>>>>,
696}
697
698impl PatResult {
699 fn matched() -> Self {
700 Self {
701 matched: true,
702 match_len: 0,
703 offs: 0,
704 captures: None,
705 }
706 }
707
708 pub fn len(mut self, l: usize) -> Self {
709 self.match_len += l;
710 self
711 }
712
713 pub fn b(&self) -> bool { self.matched }
714
715 fn add_capture(&mut self, cap: (usize, (usize, usize))) {
727 if self.captures.is_none() {
728 self.captures = Some(vec![]);
729 }
730 if let Some(c) = &mut self.captures {
731 let idx = cap.0;
732 let cap = cap.1;
733 if idx >= c.len() {
734 c.resize(idx + 1, None);
735 }
736 append_capture(idx, c, &cap);
737 }
738 }
739
740 fn capture(mut self, cap: (usize, (usize, usize))) -> Self {
741 self.add_capture(cap);
742 self
743 }
744
745 fn fail() -> Self {
746 Self {
747 matched: false,
748 match_len: 0,
749 offs: 0,
750 captures: None,
751 }
752 }
753
754 pub fn pos(&self) -> Option<(usize, usize)> {
755 if !self.matched {
756 return None;
757 }
758
759 Some((self.offs, self.match_len))
760 }
761
762 pub fn to_vval(&self, input: &str) -> VVal {
763 if !self.matched { return VVal::None; }
764
765 let v = VVal::vec();
766 v.push(VVal::new_str_mv(
767 input[self.offs..(self.offs + self.match_len)].to_string()));
768
769 if let Some(c) = &self.captures {
770 for cap in c.iter() {
771 if let Some(cap) = cap {
772 v.push(VVal::new_str_mv(cap.to_string(input)));
773 } else {
774 v.push(VVal::None);
775 }
776 }
777 }
778
779 v
780 }
781
782 #[allow(dead_code)]
783 pub fn to_test_string(&self, input: &str) -> String {
784 if !self.matched { return "-nomatch-".to_string() }
785
786 let mut s = input[self.offs..(self.offs + self.match_len)].to_string();
789
790 if let Some(c) = &self.captures {
791 for cap in c.iter() {
792 if let Some(cap) = cap {
793 s += "-";
794 s += &cap.to_test_string(input);
795 } else {
796 s += "$n";
797 }
798 }
799 }
800
801 s
802 }
803}
804
805pub(crate) type PatternNode = Box<dyn Fn(RxBuf, &mut SelectorState) -> PatResult>;
807pub(crate) type SelNode = Box<dyn Fn(&VVal, &mut SelectorState, &VVal) -> bool>;
809
810macro_rules! while_shorten_str {
811 ($s: ident, $try_len: ident, $b: block) => {
812 let mut $try_len = $s.s.len();
813
814 $b;
816
817 while $try_len > 0 {
818 $try_len -= 1;
819 while $try_len > 0 && !$s.s.is_char_boundary($try_len) {
820 $try_len -= 1;
821 }
822
823 $b;
825 }
826 }
827}
828
829fn compile_atom(p: &VVal, next: PatternNode) -> PatternNode {
830 if p.is_pair() {
833 let pair_type = p.at(0).unwrap().to_sym();
834 let pair_val = p.at(1).unwrap_or(VVal::None);
835
836 if pair_type == s2sym("I") {
837 let key_str = pair_val.clone();
838
839 Box::new(move |s: RxBuf, st: &mut SelectorState| {
840 key_str.with_s_ref(|y| {
841 let y_len = y.len();
843
844 if s.s.starts_with(y) {
845 (*next)(s.offs(y_len), st).len(y_len)
846 } else {
847 PatResult::fail()
848 }
849 })
850 })
851
852 } else if pair_type == s2sym("CCls") {
853 let chars = pair_val.clone();
854
855 Box::new(move |s: RxBuf, st: &mut SelectorState| {
856 chars.with_s_ref(|chrs| {
857 if let Some(c) = s.s.chars().next() {
858 let c_len = c.len_utf8();
859
860 for mc in chrs.chars() {
861 if c == mc {
862 return (*next)(s.offs(c_len), st).len(c_len);
863 }
864 }
865 }
866
867 PatResult::fail()
868 })
869 })
870
871 } else if pair_type == s2sym("NCCls") {
872 let chars = pair_val.clone();
873
874 Box::new(move |s: RxBuf, st: &mut SelectorState| {
875 chars.with_s_ref(|chrs| {
876 if let Some(c) = s.s.chars().next() {
877 for mc in chrs.chars() {
878 if c == mc {
879 return PatResult::fail();
880 }
881 }
882
883 let c_len = c.len_utf8();
884 (*next)(s.offs(c_len), st).len(c_len)
885 } else {
886 PatResult::fail()
887 }
888 })
889 })
890
891 } else if pair_type == s2sym("PatSub") {
892 compile_pattern(&pair_val, next)
893
894 } else if pair_type == s2sym("PatCap") {
895 let cap_idx = Rc::new(RefCell::new(0));
896 let cap_idx_b = cap_idx.clone();
897
898 let sub =
899 compile_pattern(&pair_val,
900 Box::new(move |s: RxBuf, st: &mut SelectorState| {
901 let cap = st.set_capture_end(*cap_idx.borrow(), &s);
902 (*next)(s, st).capture(cap)
903 }));
904
905 Box::new(move |s: RxBuf, st: &mut SelectorState| {
906 (*cap_idx_b.borrow_mut()) = st.push_capture_start(&s);
907 let res = (*sub)(s, st);
908 st.pop_capture();
909 res
910 })
911
912 } else if pair_type == s2sym("ZwNegLA") {
913 let sub_pat =
914 compile_atom(&pair_val,
915 Box::new(move |_s: RxBuf, _st: &mut SelectorState|
916 PatResult::matched()));
917
918 Box::new(move |s: RxBuf, st: &mut SelectorState| {
919 let res = (*sub_pat)(s, st);
920 if res.b() {
921 return PatResult::fail();
922 }
923
924 (*next)(s, st)
925 })
926
927 } else if pair_type == s2sym("ZwLA") {
928 let sub_pat =
929 compile_atom(&pair_val,
930 Box::new(move |_s: RxBuf, _st: &mut SelectorState|
931 PatResult::matched()));
932
933 Box::new(move |s: RxBuf, st: &mut SelectorState| {
934 let res = (*sub_pat)(s, st);
935 if !res.b() {
936 return PatResult::fail();
937 }
938
939 (*next)(s, st)
940 })
941
942 } else if pair_type == s2sym("Opt")
943 || pair_type == s2sym("Opt-")
944 {
945 let greedy = pair_type == s2sym("Opt");
946
947 let next : Rc<PatternNode> = Rc::from(next);
948 let next_opt : Rc<PatternNode> = next.clone();
949
950 let opt_pat =
951 compile_atom(&pair_val,
952 Box::new(move |s: RxBuf, st: &mut SelectorState|
953 (*next)(s, st)));
954
955 if greedy {
956 Box::new(move |s: RxBuf, st: &mut SelectorState| {
957 let opt_res = (*opt_pat)(s, st);
958 if opt_res.b() {
959 opt_res
960 } else {
961 (*next_opt)(s, st)
962 }
963 })
964 } else {
965 Box::new(move |s: RxBuf, st: &mut SelectorState| {
966 let res = (*next_opt)(s, st);
967 if res.b() {
968 res
969 } else {
970 (*opt_pat)(s, st)
971 }
972 })
973 }
974
975 } else if pair_type == s2sym("N1")
976 || pair_type == s2sym("N1-")
977 || pair_type == s2sym("N0")
978 || pair_type == s2sym("N0-")
979 {
980 let next : Rc<PatternNode> = Rc::from(next);
981 let next_n0 : Rc<PatternNode> = next.clone();
982
983 let sub_match_offs = Rc::new(RefCell::new(None));
984 let sub_match_offs_n = sub_match_offs.clone();
985 let sub_pat =
989 compile_atom(&pair_val,
990 Box::new(move |s: RxBuf, st: &mut SelectorState| {
991 (*sub_match_offs.borrow_mut()) = Some(s.offs);
992 (*next)(s, st)
993 }));
996
997 let n0 =
998 pair_type == s2sym("N0")
999 || pair_type == s2sym("N0-");
1000 let greedy =
1001 pair_type == s2sym("N1")
1002 || pair_type == s2sym("N0");
1003
1004 Box::new(move |s: RxBuf, st: &mut SelectorState| {
1005 let mut res =
1006 if n0 {
1007 let res = (*next_n0)(s, st);
1008 if !greedy && res.b() {
1009 return res;
1010 }
1011 res
1012 } else {
1013 PatResult::fail()
1014 };
1015
1016 let mut match_offs = 0;
1017 while match_offs <= s.s.len() {
1018 (*sub_match_offs_n.borrow_mut()) = None;
1019 let next_res = (*sub_pat)(s.offs(match_offs), st);
1020
1021 if next_res.b() {
1022 res = next_res.len(match_offs);
1023 if !greedy { break; }
1024 }
1025
1026 if let Some(sub_pat_offs) = *sub_match_offs_n.borrow() {
1027 let next_offs = sub_pat_offs - s.offs;
1028 if next_offs == match_offs {
1029 break;
1030 }
1031 match_offs = next_offs;
1032 } else {
1033 break;
1034 }
1035 }
1036
1037 res
1039 })
1040 } else {
1041 panic!("Unknown pair atom: {}", p.s());
1042 }
1043
1044 } else if p.to_sym() == s2sym("WsChar") {
1045 Box::new(move |s: RxBuf, st: &mut SelectorState| {
1046 if let Some(c) = s.s.chars().next() {
1047 let c_len = c.len_utf8();
1048 if c.is_whitespace() {
1049 return (*next)(s.offs(c_len), st).len(c_len);
1050 }
1051 }
1052
1053 PatResult::fail()
1054 })
1055
1056 } else if p.to_sym() == s2sym("NWsChar") {
1057 Box::new(move |s: RxBuf, st: &mut SelectorState| {
1058 if let Some(c) = s.s.chars().next() {
1059 let c_len = c.len_utf8();
1060
1061 if !c.is_whitespace() {
1062 return (*next)(s.offs(c_len), st).len(c_len);
1063 }
1064 }
1065
1066 PatResult::fail()
1067 })
1068
1069 } else if p.to_sym() == s2sym("Any") {
1070 Box::new(move |s: RxBuf, st: &mut SelectorState| {
1071 if let Some(c) = s.s.chars().next() {
1072 let c_len = c.len_utf8();
1073
1074 (*next)(s.offs(c_len), st).len(c_len)
1075 } else {
1076 PatResult::fail()
1077 }
1078 })
1079
1080 } else if p.to_sym() == s2sym("Glob") {
1081 Box::new(move |s: RxBuf, st: &mut SelectorState| {
1082 while_shorten_str!(s, try_len, {
1083 let res = (*next)(s.offs(try_len), st);
1084 if res.b() {
1085 return res.len(try_len);
1086 }
1087 });
1088
1089 PatResult::fail()
1090 })
1091
1092 } else if p.to_sym() == s2sym("Start") {
1093 Box::new(move |s: RxBuf, st: &mut SelectorState| {
1094 if s.is_at_start() {
1095 (*next)(s, st)
1096 } else {
1097 PatResult::fail()
1098 }
1099 })
1100
1101 } else if p.to_sym() == s2sym("End") {
1102 Box::new(move |s: RxBuf, st: &mut SelectorState| {
1103 if s.is_at_end() {
1105 (*next)(s, st)
1106 } else {
1107 PatResult::fail()
1108 }
1109 })
1110
1111 } else if p.to_sym() == s2sym("ToLowercase") {
1112 Box::new(move |s: RxBuf, st: &mut SelectorState| {
1113 let s_lower = s.s.to_lowercase();
1114 let rx = RxBuf {
1115 s: &s_lower[..],
1116 offs: s.offs,
1117 orig_len: s.orig_len,
1118 };
1119 (*next)(rx, st)
1120 })
1121
1122 } else if p.to_sym() == s2sym("ToUppercase") {
1123 Box::new(move |s: RxBuf, st: &mut SelectorState| {
1124 let s_upper = s.s.to_uppercase();
1125 let rx = RxBuf {
1126 s: &s_upper[..],
1127 offs: s.offs,
1128 orig_len: s.orig_len,
1129 };
1130 (*next)(rx, st)
1131 })
1132
1133 } else if p.is_vec() {
1134 if p.is_empty() {
1135 Box::new(move |_s: RxBuf, _st: &mut SelectorState| { PatResult::matched() })
1136 } else {
1137 panic!("UNKNOWN ATOM: {}", p.s());
1138 }
1139
1140 } else {
1141 panic!("UNKNOWN ATOM: {}", p.s());
1142 }
1143}
1144
1145fn compile_pattern_branch(pat: &VVal, next: PatternNode) -> PatternNode {
1146 let mut next : Option<PatternNode> = Some(next);
1149
1150 for i in 0..pat.len() {
1151 let p = pat.at(pat.len() - (i + 1)).expect("pattern item");
1152 let my_next = next.take();
1155 next = Some(compile_atom(&p, my_next.unwrap()));
1156 }
1157
1158 next.unwrap()
1159}
1160
1161fn compile_pattern(pat: &VVal, next: PatternNode) -> PatternNode {
1162 let first = pat.at(0).unwrap_or(VVal::None);
1165 if first.is_sym() && first.to_sym() == s2sym("Alt") {
1166 let next_a : Rc<PatternNode> = Rc::from(next);
1167 let next_b : Rc<PatternNode> = next_a.clone();
1168
1169 let branch_a =
1170 compile_pattern_branch(
1171 &pat.at(1).expect("left hand side alt branch"),
1172 Box::new(move |s: RxBuf, st: &mut SelectorState|
1173 (*next_a)(s, st)));
1174
1175 let branch_b =
1176 compile_pattern(
1177 &pat.at(2).expect("right hand side alt branch"),
1178 Box::new(move |s: RxBuf, st: &mut SelectorState|
1179 (*next_b)(s, st)));
1180
1181 Box::new(move |s: RxBuf, st: &mut SelectorState| {
1182 let res = (branch_a)(s, st);
1183 if res.b() {
1184 res
1185 } else {
1186 (branch_b)(s, st)
1187 }
1188 })
1189 } else {
1190 compile_pattern_branch(pat, next)
1191 }
1192}
1193
1194fn match_pattern(pat: &PatternNode, s: &str, st: &mut SelectorState) -> bool {
1195 let old_str = st.set_str(s);
1196
1197 let rb = RxBuf::new(s);
1198 let res = (*pat)(rb, st);
1199
1200 st.restore_str(old_str);
1201
1202 res.b()
1203 && res.match_len == s.len()
1204}
1205
1206fn compile_key(k: &VVal, sn: SelNode) -> SelNode {
1207 if k.is_int() {
1208 let i = k.i();
1209
1210 Box::new(move |v: &VVal, st: &mut SelectorState, capts: &VVal| {
1211 if let Some(v) = v.at(i as usize) {
1212 (*sn)(&v, st, capts)
1213 } else {
1214 false
1215 }
1216 })
1217 } else {
1218 let pat = k.at(0).unwrap_or(VVal::None);
1219 let pat_type = k.at(0).unwrap_or(VVal::None).at(0).unwrap_or(VVal::None);
1220
1221 if k.len() == 1 && pat.is_pair() && pat_type.to_sym() == s2sym("I") {
1222 let key = pat.at(1).unwrap_or(VVal::None).to_sym();
1223
1224 return
1225 Box::new(move |v: &VVal, st: &mut SelectorState, capts: &VVal| {
1226 if let Some(v) = v.get_key_sym(&key) {
1227 (*sn)(&v, st, capts)
1228 } else {
1229 false
1230 }
1231 });
1232
1233 } else if k.len() == 1 && pat.to_sym() == s2sym("Glob") {
1234 return
1235 Box::new(move |v: &VVal, st: &mut SelectorState, capts: &VVal| {
1236 if !v.iter_over_vvals() { return false; }
1237
1238 let mut found = false;
1239 for (v, _) in v.iter() {
1240 if (*sn)(&v, st, capts) {
1241 found = true;
1242 }
1243 }
1244
1245 found
1246 });
1247 }
1248
1249 let pat = compile_pattern(k,
1250 Box::new(move |_s: RxBuf, _st: &mut SelectorState| {
1251 PatResult::matched()
1253 }));
1254
1255 Box::new(move |v: &VVal, st: &mut SelectorState, capts: &VVal| {
1256 if !v.iter_over_vvals() { return false; }
1257
1258 let mut found = false;
1259
1260 for (i, (v, k)) in v.iter().enumerate() {
1261 if let Some(k) = k {
1262 k.with_s_ref(|s| {
1263 if match_pattern(&pat, s, st)
1264 && (*sn)(&v, st, capts)
1265 {
1266 found = true;
1267 }
1268 });
1269
1270 } else {
1271 let idx_str = format!("{}", i);
1272
1273 if match_pattern(&pat, &idx_str[..], st)
1274 && (*sn)(&v, st, capts) {
1275 found = true;
1276 }
1277 }
1278 }
1279
1280 found
1281 })
1282 }
1283}
1284
1285fn compile_kv(kv: &VVal) -> SelNode {
1286 let pat =
1287 compile_pattern(&kv.at(1).expect("pattern in kv"),
1288 Box::new(move |_s: RxBuf, _st: &mut SelectorState| {
1289 PatResult::matched()
1291 }));
1292
1293 compile_key(
1294 &kv.at(0).expect("key in kv"),
1295 Box::new(move |v: &VVal, st: &mut SelectorState, _capts: &VVal|
1296 v.with_s_ref(|s| match_pattern(&pat, s, st))))
1297}
1298
1299fn compile_kv_match(kvm: &VVal) -> SelNode {
1300 let mut kv_conds = vec![];
1301 for i in 1..kvm.len() {
1302 kv_conds.push(compile_kv(&kvm.at(i).unwrap()));
1303 }
1304
1305 Box::new(move |v: &VVal, st: &mut SelectorState, capts: &VVal| {
1306 for kv in kv_conds.iter() {
1307 if !(*kv)(v, st, capts) {
1308 return false;
1309 }
1310 }
1311
1312 true
1313 })
1314}
1315
1316fn compile_node_cond(n: &VVal) -> SelNode {
1317 let node_type = n.at(0).expect("proper node condition").to_sym();
1318
1319 if node_type == s2sym("KV") {
1320 compile_kv_match(n)
1321
1322 } else if node_type == s2sym("LA") {
1323 let subsel = compile_selector(&n.at(1).expect("sub selector"), true);
1324 Box::new(move |v: &VVal, st: &mut SelectorState, _capts: &VVal| {
1325 let nocaps = VVal::None;
1326 (*subsel)(v, st, &nocaps)
1327 })
1328
1329 } else if node_type == s2sym("And") {
1330 let a = compile_node_cond(&n.at(1).expect("node condition a"));
1331 let b = compile_node_cond(&n.at(2).expect("node condition b"));
1332 Box::new(move |v: &VVal, st: &mut SelectorState, capts: &VVal|
1333 (*a)(v, st, capts) && (*b)(v, st, capts))
1334
1335 } else if node_type == s2sym("Or") {
1336 let a = compile_node_cond(&n.at(1).expect("node condition a"));
1337 let b = compile_node_cond(&n.at(2).expect("node condition b"));
1338 Box::new(move |v: &VVal, st: &mut SelectorState, capts: &VVal|
1339 (*a)(v, st, capts) || (*b)(v, st, capts))
1340
1341 } else if node_type == s2sym("Not") {
1342 let nc = compile_node_cond(&n.at(1).expect("node condition"));
1343 Box::new(move |v: &VVal, st: &mut SelectorState, capts: &VVal|
1344 !(*nc)(v, st, capts))
1345
1346 } else if node_type == s2sym("Type") {
1347 let pat = compile_pattern(&n.at(1).expect("node type pattern"),
1348 Box::new(move |_s: RxBuf, _st: &mut SelectorState| {
1349 PatResult::matched()
1351 }));
1352 Box::new(move |v: &VVal, st: &mut SelectorState, _capts: &VVal| {
1353 match_pattern(&pat, v.type_name(), st)
1354 })
1355
1356 } else if node_type == s2sym("Str") {
1357 let pat = compile_pattern(&n.at(1).expect("node type pattern"),
1358 Box::new(move |_s: RxBuf, _st: &mut SelectorState| {
1359 PatResult::matched()
1361 }));
1362 Box::new(move |v: &VVal, st: &mut SelectorState, _capts: &VVal| {
1363 v.with_s_ref(|s| match_pattern(&pat, s, st))
1364 })
1365
1366 } else {
1367 panic!("Unsupported node cond: {}", node_type);
1368 }
1369}
1370
1371fn compile_node(n: &VVal, sn: SelNode) -> SelNode {
1372 let node_type = n.at(0).expect("proper node").to_sym();
1373
1374 let sn =
1375 if let Some(node_cond) = n.at(2) { if node_cond.is_none() {
1377 sn
1378 } else {
1379 let cond = compile_node_cond(&node_cond);
1380 Box::new(move |v: &VVal, st: &mut SelectorState, capts: &VVal| {
1381 if (*cond)(v, st, capts) {
1382 (*sn)(v, st, capts)
1383 } else {
1384 false
1385 }
1386 })
1387 }
1388
1389 } else {
1390 sn
1391 };
1392
1393 if node_type == s2sym("NK") {
1394 compile_key(
1395 &n.at(1).unwrap_or(VVal::None), sn)
1396
1397 } else if node_type == s2sym("RecGlob") {
1398 let rec_cond = n.at(1).unwrap_or(VVal::None);
1399 let recval_cond = n.at(3).unwrap_or(VVal::None);
1400
1401 let key_cond =
1402 if let Some(cond) = rec_cond.at(0) {
1403 if cond.to_sym() == s2sym("NotKey") {
1404 let pat = compile_pattern(&rec_cond.at(1).expect("pattern"),
1405 Box::new(move |_s: RxBuf, _st: &mut SelectorState| {
1406 PatResult::matched()
1407 }));
1408
1409 Some(Box::new(move |k: &VVal, st: &mut SelectorState| -> bool {
1410 !k.with_s_ref(|s| match_pattern(&pat, s, st))
1411 }))
1412 } else {
1413 None
1414 }
1415 } else {
1416 None
1417 };
1418
1419 let val_cond =
1420 if recval_cond.is_some() {
1421 Some(compile_node_cond(&recval_cond))
1422 } else {
1423 None
1424 };
1425
1426 Box::new(move |v: &VVal, st: &mut SelectorState, capts: &VVal| {
1427 if !v.iter_over_vvals() { return false; }
1428
1429 let mut found = false;
1430
1431 let mut stack = vec![v.clone()];
1432 while let Some(v) = stack.pop() {
1433 if (*sn)(&v, st, capts) {
1435 found = true;
1436 }
1437
1438 for (v, k) in v.iter() {
1439 if let Some(k) = k {
1440 if let Some(key_cond) = &key_cond {
1441 if !(*key_cond)(&k, st) {
1442 continue;
1443 }
1444 }
1445 }
1446
1447 if let Some(val_cond) = &val_cond {
1448 if !(*val_cond)(&v, st, &VVal::None) {
1449 continue;
1450 }
1451 }
1452
1453 if v.iter_over_vvals() {
1454 stack.push(v);
1455 }
1456 }
1457 }
1458
1459 found
1460 })
1461
1462 } else if node_type == s2sym("NCap") {
1463 compile_node(&n.at(1).expect("capture node"),
1464 Box::new(move |v: &VVal, st: &mut SelectorState, capts: &VVal| {
1465 st.push_sel_capture(v.clone());
1466 let ret = (*sn)(v, st, capts);
1467 st.pop_sel_caputure();
1468 ret
1469 }))
1470
1471 } else {
1472 panic!("Unsupported node type: {}", node_type);
1473 }
1474}
1475
1476fn compile_selector(sel: &VVal, no_capture: bool) -> SelNode {
1477 if let VVal::Lst(_) = sel {
1479
1480 let first = sel.at(0).unwrap_or(VVal::None);
1481 if first.to_sym() == s2sym("Path") {
1482 let mut next : Option<SelNode> = Some(Box::new(
1483 move |v: &VVal, st: &mut SelectorState, capts: &VVal| {
1484 if !no_capture {
1485 if st.has_captures() {
1486 capts.push(st.get_sel_captures());
1487 } else {
1488 capts.push(v.clone());
1489 }
1490 }
1491 true
1492 }));
1493
1494 for i in 1..sel.len() {
1495 let nod = sel.at(sel.len() - i).expect("proper path");
1496 let n = next.take().unwrap();
1497 next = Some(compile_node(&nod, n));
1498 }
1499
1500 next.unwrap()
1501 } else {
1502 Box::new(move |_v: &VVal, _st: &mut SelectorState, _capts: &VVal| {
1503 panic!("unimplemented selector type: {}", first.s());
1504 })
1505 }
1506 } else {
1507 let sel = sel.clone();
1508 Box::new(move |_v: &VVal, _st: &mut SelectorState, _capts: &VVal| {
1509 panic!("unimplemented selector type?: {}", sel.s());
1510 })
1511 }
1512}
1513
1514fn compile_find_pattern(v: &VVal) -> PatternNode {
1515 let pat = compile_pattern(v,
1516 Box::new(move |_s: RxBuf, _st: &mut SelectorState| {
1517 PatResult::matched()
1519 }));
1520
1521 Box::new(move |s: RxBuf, st: &mut SelectorState| {
1522 let mut i = 0;
1523 while i <= s.s.len() {
1524 let mut res = (*pat)(s.offs(i), st);
1525 if res.b() {
1526 res.offs += i;
1527 return res;
1528 }
1529
1530 i += 1;
1531 while i <= s.s.len() && !s.s.is_char_boundary(i) {
1532 i += 1;
1533 }
1534 }
1535
1536 PatResult::fail()
1537 })
1538}
1539
1540
1541fn compile_match_pattern(v: &VVal) -> PatternNode {
1542 let pat = compile_pattern(v,
1543 Box::new(move |_s: RxBuf, _st: &mut SelectorState| {
1544 PatResult::matched()
1546 }));
1547
1548 Box::new(move |s: RxBuf, st: &mut SelectorState| {
1549 (*pat)(s, st)
1550 })
1551}
1552
1553fn check_pattern_start_anchor(pattern: &VVal) -> bool {
1554 if let Some(first) = pattern.at(0) {
1555 if first.is_sym() && first.to_sym() == s2sym("Start") {
1556 return true;
1557 } else if first.is_pair() {
1558 if let Some(pair_type) = first.at(0) {
1559 let pair_val = first.at(1).unwrap_or(VVal::None);
1560 let branch_first = pair_val.at(0).unwrap_or(VVal::None);
1561
1562 if pair_type.to_sym() == s2sym("PatSub")
1563 || pair_type.to_sym() == s2sym("PatCap")
1564 {
1565 return branch_first.is_sym()
1566 && branch_first.to_sym() == s2sym("Start");
1567 }
1568 }
1569 }
1570 }
1571
1572 false
1573}
1574
1575pub fn create_selector(sel: &str, result_ref: VVal)
1580 -> Result<Box<dyn Fn(&VVal) -> VVal>, ParseError>
1581{
1582 let selector = parse_selector(sel)?;
1583 let comp_sel = compile_selector(&selector, false);
1584
1585 Ok(Box::new(move |v: &VVal| {
1586 let mut state = SelectorState::new();
1587 let capts = VVal::vec();
1588 (*comp_sel)(v, &mut state, &capts);
1589
1590 if !capts.is_empty() {
1591 result_ref.set_ref(capts.clone());
1592 capts
1593 } else {
1594 result_ref.set_ref(VVal::None);
1595 VVal::None
1596 }
1597 }))
1598}
1599
1600
1601pub fn create_selector_function(sel: &str, result_ref: VVal)
1606 -> Result<VVal, ParseError>
1607{
1608 let rref2 = result_ref.clone();
1609 let sel_fun = create_selector(sel, result_ref)?;
1610
1611 Ok(VValFun::new_fun(
1612 move |env: &mut Env, _argc: usize| {
1613 if let Some(v) = env.arg_ref(0) {
1614 Ok(sel_fun(v))
1615 } else {
1616 rref2.set_ref(VVal::None);
1617 Ok(VVal::None)
1618 }
1619 }, Some(1), Some(1), false))
1620}
1621
1622fn parse_and_compile_regex(pat: &str) -> Result<PatternNode, ParseError> {
1623 let mut ps = State::new(pat, "<pattern>");
1624 ps.set_pattern_ident_mode();
1625 ps.skip_ws();
1626 let pattern = parse_pattern(&mut ps)?;
1627
1628 ps.skip_ws();
1629
1630 if !ps.at_end() {
1631 return Err(ps.err(
1632 ParseErrorKind::UnexpectedToken(
1633 ps.peek().unwrap(), "end of pattern")));
1634 }
1635
1636 let not_find = check_pattern_start_anchor(&pattern);
1637 let comp_pat =
1638 if not_find { compile_match_pattern(&pattern) }
1639 else { compile_find_pattern(&pattern) };
1640
1641 Ok(comp_pat)
1642}
1643
1644pub fn create_regex_find(pat: &str, result_ref: VVal)
1649 -> Result<Box<dyn Fn(&VVal) -> VVal>, ParseError>
1650{
1651 let comp_pat = parse_and_compile_regex(pat)?;
1652
1653 Ok(Box::new(move |v: &VVal| {
1654 v.with_s_ref(|s| {
1655 let mut ss = SelectorState::new();
1656 ss.set_str(s);
1657 let pat_res = (*comp_pat)(RxBuf::new(s), &mut ss);
1658 let r = pat_res.to_vval(s);
1659 result_ref.set_ref(r.clone());
1660 r
1661 })
1662 }))
1663}
1664
1665struct FindAllState<'a, 'b> {
1666 ss: SelectorState,
1667 s: &'a str,
1668 comp_pat: &'b PatternNode,
1669 cur_offs: usize,
1670}
1671
1672impl<'a, 'b> FindAllState<'a, 'b> {
1673 fn new(s: &'a str, comp_pat: &'b PatternNode) -> Self {
1674 Self {
1675 ss: SelectorState::new(),
1676 cur_offs: 0,
1677 comp_pat,
1678 s,
1679 }
1680 }
1681
1682 fn next(&mut self) -> Option<(VVal, (usize, usize))> {
1683 let rxb = RxBuf::new(&self.s[self.cur_offs..]);
1684
1685 let pat_res = (*self.comp_pat)(rxb, &mut self.ss);
1686 if let Some(pos) = pat_res.pos() {
1687 let v = pat_res.to_vval(&self.s[self.cur_offs..]);
1688 let out_pos = (self.cur_offs + pos.0, pos.1);
1689 self.cur_offs += pos.0 + pos.1;
1690 Some((v, out_pos))
1691
1692 } else {
1693 None
1694 }
1695 }
1696}
1697
1698pub type RegexFindAllFunc = Box<dyn Fn(&VVal, Box<dyn Fn(VVal, (usize, usize))>)>;
1699
1700pub fn create_regex_find_all(pat: &str, result_ref: VVal)
1705 -> Result<RegexFindAllFunc, ParseError>
1706{
1707 let comp_pat = parse_and_compile_regex(pat)?;
1708
1709 Ok(Box::new(move |v: &VVal, fun: Box<dyn Fn(VVal, (usize, usize))>| {
1710 v.with_s_ref(|s| {
1711 let mut fs = FindAllState::new(s, &comp_pat);
1712
1713 while let Some((v, pos)) = fs.next() {
1714 result_ref.set_ref(v.clone());
1715 fun(v, pos);
1716 }
1717 })
1718 }))
1719}
1720
1721#[derive(PartialEq,Debug,Copy,Clone)]
1722pub enum RegexMode {
1723 Find,
1724 FindAll,
1725 Substitute
1726}
1727
1728#[allow(clippy::should_implement_trait)]
1729impl RegexMode {
1730 pub fn from_str(s: &str) -> Self {
1731 match s {
1732 "g" => RegexMode::FindAll,
1733 "s" => RegexMode::Substitute,
1734 _ => RegexMode::Find,
1735 }
1736 }
1737}
1738
1739pub fn create_regex_find_function(pat: &str, result_ref: VVal, mode: RegexMode)
1744 -> Result<VVal, ParseError>
1745{
1746 let rref2 = result_ref.clone();
1747 match mode {
1748 RegexMode::FindAll => {
1749 let comp_pat = parse_and_compile_regex(pat)?;
1750 Ok(VValFun::new_fun(
1751 move |env: &mut Env, _argc: usize| {
1752 let s = env.arg(0);
1753 let fun = env.arg(1).disable_function_arity();
1754
1755 s.with_s_ref(|s| {
1756 let mut fs = FindAllState::new(s, &comp_pat);
1757 let mut ret = Ok(VVal::None);
1758
1759 while let Some((v, pos)) = fs.next() {
1760 result_ref.set_ref(v.clone());
1761 env.push(v);
1762 env.push(VVal::Int(pos.0 as i64));
1763 env.push(VVal::Int(pos.1 as i64));
1764 match fun.call_internal(env, 3) {
1765 Ok(r) => { ret = Ok(r); },
1766 Err(StackAction::Break(v)) => { ret = Ok(v.as_ref().clone()); env.popn(3); break; },
1767 Err(StackAction::Next) => { },
1768 Err(e) => { ret = Err(e); env.popn(3); break; },
1769 }
1770 env.popn(3);
1771 }
1772
1773 ret
1774 })
1775 }, Some(2), Some(2), false))
1776 },
1777 RegexMode::Substitute => {
1778 let comp_pat = parse_and_compile_regex(pat)?;
1779 Ok(VValFun::new_fun(
1780 move |env: &mut Env, _argc: usize| {
1781 let s = env.arg(0);
1782 let fun = env.arg(1).disable_function_arity();
1783
1784 let mut out = String::new();
1785 let mut last_offs = 0;
1786
1787 s.with_s_ref(|s| {
1788 let mut fs = FindAllState::new(s, &comp_pat);
1789 let mut ret = Ok(VVal::None);
1790
1791 while let Some((v, pos)) = fs.next() {
1792 if last_offs < pos.0 {
1793 out.push_str(&s[last_offs..pos.0]);
1794 last_offs = pos.0 + pos.1;
1795 }
1796
1797 result_ref.set_ref(v.clone());
1798 env.push(v);
1799 env.push(VVal::Int(pos.0 as i64));
1800 env.push(VVal::Int(pos.1 as i64));
1801 match fun.call_internal(env, 3) {
1802 Ok(r) => {
1803 r.with_s_ref(|r| {
1804 out.push_str(r);
1805 last_offs = pos.0 + pos.1;
1806 });
1807 },
1808 Err(StackAction::Break(v)) => {
1809 v.with_s_ref(|r| {
1810 out.push_str(r);
1811 last_offs = pos.0 + pos.1;
1812 });
1813 env.popn(3);
1814 break;
1815 },
1816 Err(StackAction::Next) => {
1817 },
1818 Err(e) => {
1819 ret = Err(e);
1820 env.popn(3);
1821 break;
1822 },
1823 }
1824 env.popn(3);
1825 }
1826
1827 if last_offs < s.len() {
1828 out.push_str(&s[last_offs..]);
1829 }
1830
1831 if let Ok(VVal::None) = ret {
1832 ret = Ok(VVal::new_str_mv(out));
1833 }
1834
1835 ret
1836 })
1837 }, Some(2), Some(2), false))
1838 },
1839 RegexMode::Find => {
1840 let match_fun = create_regex_find(pat, result_ref)?;
1841 Ok(VValFun::new_fun(
1842 move |env: &mut Env, _argc: usize| {
1843 if let Some(s) = env.arg_ref(0) {
1844 Ok(match_fun(s))
1845 } else {
1846 rref2.set_ref(VVal::None);
1847 Ok(VVal::None)
1848 }
1849 }, Some(1), Some(1), false))
1850 },
1851 }
1852}
1853
1854#[cfg(test)]
1855mod tests {
1856 use super::*;
1857 use regex_syntax::ast::Ast;
1858use regex_syntax::ast::LiteralKind;
1860 use regex_syntax::ast::SpecialLiteralKind;
1861 use regex_syntax::ast::RepetitionKind;
1862
1863 pub fn re_ast2wlpat(a: &Ast) -> String {
1864 match a {
1865 Ast::Empty(_) => "".to_string(),
1866 Ast::Flags(f) => {
1867 panic!("Got flags: {:?}", f);
1868 },
1869 Ast::Literal(l) => {
1870 match &l.kind {
1871 LiteralKind::Verbatim => l.c.to_string(),
1872 LiteralKind::Punctuation => l.c.to_string(),
1873 LiteralKind::Octal => l.c.to_string(),
1874 LiteralKind::HexFixed(_) => l.c.to_string(),
1875 LiteralKind::HexBrace(_) => l.c.to_string(),
1876 LiteralKind::Special(slk) => {
1877 match &slk {
1878 SpecialLiteralKind::Bell => "\\b".to_string(),
1879 SpecialLiteralKind::FormFeed => "\\f".to_string(),
1880 SpecialLiteralKind::Tab => "\\t".to_string(),
1881 SpecialLiteralKind::LineFeed => "\\n".to_string(),
1882 SpecialLiteralKind::CarriageReturn => "\\r".to_string(),
1883 SpecialLiteralKind::VerticalTab => "\\v".to_string(),
1884 SpecialLiteralKind::Space => "[ ]".to_string(),
1885 }
1886 },
1887 }
1888 },
1889 Ast::Dot(_) => "?".to_string(),
1890 Ast::Assertion(a) => {
1891 panic!("Assertion: {:?}", a);
1892 },
1893 Ast::Class(cl) => {
1894 panic!("Class: {:?}", cl);
1895 },
1896 Ast::Repetition(rep) => {
1897 match &rep.op.kind {
1898 RepetitionKind::ZeroOrOne => {
1899 if rep.greedy {
1900 "$?".to_string() + &re_ast2wlpat(rep.ast.as_ref())
1901 } else {
1902 "$<?".to_string() + &re_ast2wlpat(rep.ast.as_ref())
1903 }
1904 },
1905 RepetitionKind::OneOrMore => {
1906 if rep.greedy {
1907 "$+".to_string() + &re_ast2wlpat(rep.ast.as_ref())
1908 } else {
1909 "$<+".to_string() + &re_ast2wlpat(rep.ast.as_ref())
1910 }
1911 },
1912 RepetitionKind::ZeroOrMore => {
1913 if rep.greedy {
1914 "$*".to_string() + &re_ast2wlpat(rep.ast.as_ref())
1915 } else {
1916 "$<*".to_string() + &re_ast2wlpat(rep.ast.as_ref())
1917 }
1918 },
1919 _ => panic!("Unimplemented rep op: {:?}", rep),
1920 }
1921 },
1922 Ast::Group(g) => {
1923 panic!("Grp: {:?}", g);
1924 },
1925 Ast::Alternation(alt) => {
1926 panic!("alt: {:?}", alt);
1927 },
1928 Ast::Concat(cat) => {
1929 cat.asts.iter()
1930 .map(|a| re_ast2wlpat(a))
1931 .collect::<Vec<String>>()
1932 .join("")
1933 },
1934 }
1935 }
1936
1937 pub fn re2wlpat(s: &str) -> String {
1938 use regex_syntax::ast::parse::Parser;
1939 let ast = Parser::new().parse(s).unwrap();
1940 println!("***\nIN: {}\n***", s);
1941 println!("***\nAST: {:?}\n***", ast);
1942 re_ast2wlpat(&ast)
1943 }
1944
1945 pub fn v(s: &str) -> VVal {
1946 let mut ctx = crate::EvalContext::new_default();
1947 ctx.eval(s).expect("correct compilation")
1948 }
1949
1950 fn rep(re: &str, st: &str) -> String {
1951 let wlre = re2wlpat(re);
1952 println!("######## WLambda Regex: [ {} ]", wlre);
1953 pat(&wlre, st)
1954 }
1955
1956 fn pat(pat: &str, st: &str) -> String {
1957 let mut ps = State::new(pat, "<pattern>");
1958 ps.set_pattern_ident_mode();
1959 ps.skip_ws();
1960 match parse_pattern(&mut ps) {
1961 Ok(v) => {
1962 let pn = compile_find_pattern(&v);
1964 let mut ss = SelectorState::new();
1965 ss.set_str(st);
1966 let rb = RxBuf::new(st);
1967 (*pn)(rb, &mut ss).to_test_string(st)
1968 },
1969 Err(e) => format!("Error: {}", e),
1970 }
1971 }
1972
1973 fn p(s: &str) -> String {
1974 match parse_selector(s) {
1975 Ok(v) => v.s(),
1976 Err(e) => format!("Error: {}", e),
1977 }
1978 }
1979
1980 fn pev(s: &str, v: &VVal) -> VVal {
1981 let sel_ast =
1982 match parse_selector(s) {
1983 Ok(v) => v,
1984 Err(e) => { return VVal::new_str_mv(format!("Error: {}", e)); },
1985 };
1986 let sn = compile_selector(&sel_ast, false);
1987 let mut state = SelectorState::new();
1988 let capts = VVal::vec();
1989 (*sn)(v, &mut state, &capts);
1990 capts.sort(|a: &VVal, b: &VVal| { a.compare_str(b) });
1991 capts
1992 }
1993
1994 fn pes(s: &str, v: &VVal) -> String { pev(s, v).s() }
1995
1996 #[test]
1997 fn check_selector_match_path() {
1998 let v1 =
1999 VVal::map3("a",
2000 VVal::vec3(
2001 VVal::Int(20),
2002 VVal::pair(VVal::Int(2), VVal::Int(4)),
2003 VVal::new_str("F0O")),
2004 "ab",
2005 VVal::vec2(
2006 VVal::Int(33),
2007 VVal::Int(44)),
2008 "xyab",
2009 VVal::vec3(
2010 VVal::Int(8),
2011 VVal::Int(9),
2012 VVal::map2("X", VVal::Int(10), "Y", VVal::Int(20))));
2013
2014 assert_eq!(pes("a", &v1), "$[$[20,$p(2,4),\"F0O\"]]");
2015 assert_eq!(pes("a/2/2", &v1), "$['O']");
2016 assert_eq!(pes("a/2/1", &v1), "$['0']");
2017 assert_eq!(pes("ab/0", &v1), "$[33]");
2018
2019 assert_eq!(pes("a/?", &v1), "$[$p(2,4),20,\"F0O\"]");
2020 assert_eq!(pes("a/?/1", &v1), "$['0',4]");
2021
2022 assert_eq!(pes("?/1", &v1), "$[$p(2,4)]");
2023 assert_eq!(pes("?/2", &v1), "$[\"F0O\"]");
2024
2025 assert_eq!(pes("?b/1", &v1), "$[44]");
2026 assert_eq!(pes("a?/1", &v1), "$[44]");
2027 assert_eq!(pes("??ab/1", &v1), "$[9]");
2028
2029 assert_eq!(pes("*/X", &v1), "$[]");
2030 assert_eq!(pes("*/?/X", &v1), "$[10]");
2031 assert_eq!(pes("*/*/X", &v1), "$[10]");
2032 assert_eq!(pes("*/2/2", &v1), "$['O']");
2033
2034 assert_eq!(pes("*ab/*/X", &v1), "$[10]");
2035
2036 assert_eq!(pes("[xy][xy]*/[01]", &v1), "$[8,9]");
2037 assert_eq!(pes("[^xy][^xy]/[01]", &v1), "$[33,44]");
2038 assert_eq!(pes("a/[^01]", &v1), "$[\"F0O\"]");
2039
2040 assert_eq!(pes("(ab)/[01]", &v1), "$[33,44]");
2041 assert_eq!(pes("(x)y(a)b/[01]", &v1), "$[8,9]");
2042 assert_eq!(pes("$!(a)*/[01]", &v1), "$[8,9]");
2043 assert_eq!(pes("a/$![01]?", &v1), "$[\"F0O\"]");
2044
2045 assert_eq!(pes("$=x*/[01]", &v1), "$[8,9]");
2046 assert_eq!(pes("$=(ab)*/[01]", &v1), "$[33,44]");
2047 assert_eq!(pes("a$=b*/[01]", &v1), "$[33,44]");
2048 assert_eq!(pes("$!x*$=b?/[01]", &v1), "$[33,44]");
2049
2050 assert_eq!(pes("$+[xy]ab/0", &v1), "$[8]");
2051 assert_eq!(pes("a$+b/0", &v1), "$[33]");
2052 assert_eq!(pes("$*[xy]ab/0", &v1), "$[33,8]");
2053 assert_eq!(pes("$?[xy][xy]ab/0", &v1), "$[8]");
2054
2055 let v2 = VVal::map1("\t", VVal::Int(12));
2056 assert_eq!(pes("\\t", &v2), "$[12]");
2057 assert_eq!(pes("[\\t]", &v2), "$[12]");
2058 }
2059
2060 #[test]
2061 fn check_selector_match_esc() {
2062 let v1 =
2063 VVal::map3("\\",
2064 VVal::vec3(
2065 VVal::Int(20),
2066 VVal::pair(VVal::Int(2), VVal::Int(4)),
2067 VVal::new_str("F0O%/{}[]")),
2068 "//",
2069 VVal::vec2(
2070 VVal::Int(33),
2071 VVal::Int(44)),
2072 "?*",
2073 VVal::vec3(
2074 VVal::Int(8),
2075 VVal::Int(9),
2076 VVal::map2("*", VVal::Int(10), "|", VVal::Int(20))));
2077
2078 assert_eq!(pes("*/*/\\*", &v1), "$[10]");
2079 assert_eq!(pes("*/*/\\|", &v1), "$[20]");
2080
2081 assert_eq!(pes("[\\\\]*/1", &v1), "$[$p(2,4)]");
2082 assert_eq!(pes("[\\/]*/0", &v1), "$[33]");
2083 assert_eq!(pes("\\/\\//0", &v1), "$[33]");
2084 }
2085
2086 #[test]
2087 fn check_selector_kv_match() {
2088 let v1 = v(r#"
2089 $[
2090 ${ a = :test, x = 10 , childs = $[10, 20], },
2091 ${ b = :test, x = 11 , childs = $[11, 21, 31], },
2092 ${ a = :test, x = 12 , childs = $[12, 22, 32, 42], },
2093 ${ c = :test, y = 15, x = 22, childs = $[13, 23, 33, 43, 53], },
2094 ${ a = :test, y = 16, x = 23, childs = $[14, 24, 34, 44, 54, 64], },
2095 ]
2096 "#);
2097
2098 assert_eq!(pes("*:{a = test, x = 1* }/childs/1", &v1), "$[20,22]");
2099
2100 assert_eq!(pes("*/[xy]", &v1), "$[10,11,12,15,16,22,23]");
2101 assert_eq!(pes("*/:{a = test, x = 1* }", &v1), "$[]");
2102 assert_eq!(pes("*:{a = test, x = 1* }", &v1), "$[${a=:test,childs=$[10,20],x=10},${a=:test,childs=$[12,22,32,42],x=12}]");
2103 assert_eq!(pes("*:{a = test, x = 2* }/y", &v1), "$[16]");
2104 assert_eq!(pes("*:{[ab] = test }/childs/*", &v1), "$[10,11,12,14,20,21,22,24,31,32,34,42,44,54,64]");
2105 assert_eq!(pes("*:{a = test}/[xy]", &v1), "$[10,12,16,23]");
2106
2107 assert_eq!(pes("*:!{a = test}/[xy]", &v1), "$[11,15,22]");
2108 assert_eq!(pes("*:!{ * = * }/[xy]", &v1), "$[]");
2109 assert_eq!(pes("*:!{ x = 1* }/[xy]", &v1), "$[15,16,22,23]");
2110 assert_eq!(pes("*:!{[bc] = test} & :{ y = *6 }/childs/1", &v1), "$[24]");
2111 assert_eq!(pes(r"
2112 *:!{[bc] = test}
2113 & :{ y = *6 }
2114 | :{ x = *[12] }
2115 /childs/1", &v1), "$[22,24]");
2116 assert_eq!(pes("*:!{a = test} & :!{ c = test }/childs/1", &v1), "$[21]");
2117 assert_eq!(pes("*:{a = test} | :{ c = test }/childs/1", &v1), "$[20,22,23,24]");
2118 }
2119
2120 #[test]
2121 fn check_selector_rec_glob() {
2122 let v1 = v(r#"
2123 !i = $[10, 20, 30];
2124 !j = $[40, 50];
2125 !k = $[90, 80];
2126 !d = ${ h = 10, w = 20, childs = $[ i, j ] };
2127 !e = ${ h = 20, w = 24, childs = $[ j, k ] };
2128 !f = ${ h = 12, w = 30, childs = $[ i, k ] };
2129 $[
2130 ${ a = :test, x = 10 , childs = $[d, e], },
2131 ${ b = :test, x = 11 , childs = $[d, e, f], },
2132 ${ a = :test, x = 12 , childs = $[f], },
2133 ${ c = :test, y = 15, x = 22, childs = $[e], },
2134 ${ a = :test, y = 16, x = 23, childs = $[f, e], },
2135 ]
2136 "#);
2137
2138 assert_eq!(
2139 pes("*/childs/**/*:type = integer & :str=[89]*", &v1),
2140 "$[80,80,80,80,80,80,80,90,90,90,90,90,90,90]");
2141 assert_eq!(pes("**/childs/*:{ w = 2* }/h", &v1), "$[10,10,20,20,20,20]");
2142 assert_eq!(pes("**/childs/*:type=vector/*:type = integer", &v1),
2143 "$[10,10,10,10,10,20,20,20,20,20,30,30,30,30,30,40,40,40,40,40,40,50,50,50,50,50,50,80,80,80,80,80,80,80,90,90,90,90,90,90,90]");
2144
2145 assert_eq!(
2147 pes("*/childs/^**/^*:type = integer & :str=[89]*", &v1),
2148 "$[$[$<1=>$[90,80],80],$[$<1>,80],$[$<1>,80],$[$<1>,80],$[$<1>,80],$[$<1>,80],$[$<1>,80],$[$<1>,90],$[$<1>,90],$[$<1>,90],$[$<1>,90],$[$<1>,90],$[$<1>,90],$[$<1>,90]]");
2149 assert_eq!(
2150 pes("*/0/[x]", &pev("**/^*:{a=*}|:{b=*}/childs/*:{h=12}/^h", &v1)),
2151 "$[11,12,23]");
2152
2153 assert_eq!(pes("**/*:str=1[^0]", &v1), "$[11,12,12,12,12,15,16]");
2154 }
2155
2156 #[test]
2157 fn check_selector_la() {
2158 let v1 = v(r#"
2159 !i = $[10, 20, 30];
2160 !j = $[40, 50];
2161 !k = $[90, 80];
2162 !d = ${ h = 10, w = 20, childs = $[ i, j ] };
2163 !e = ${ h = 20, w = 24, childs = $[ j, k ] };
2164 !f = ${ h = 12, w = 30, childs = $[ i, k ] };
2165 $[
2166 ${ a = :test, x = 10 , childs = $[d, e], },
2167 ${ b = :test, x = 11 , childs = $[d, e, f], },
2168 ${ a = :test, x = 12 , childs = $[f], },
2169 ${ c = :test, y = 15, x = 22, childs = $[e], },
2170 ${ a = :test, y = 16, x = 23, childs = $[f, e], },
2171 ]
2172 "#);
2173
2174 assert_eq!(pes("*:(childs/*:{h=12})/x", &v1), "$[11,12,23]");
2175 }
2176
2177 #[test]
2178 fn check_selector_rec_cond() {
2179 let v1 = v(r#"
2180 $[
2181 ${ a = :test, x = 10 , childs = $[${ i = 90 }], },
2182 ${ b = :test, x = 11 , childs = $[${ i = 91 }], },
2183 ${ a = :test, x = 12 , childs = $[${ i = 92 }], },
2184 ${ c = :test, y = 15, x = 22, childs = $[${ i = 93 }], },
2185 ${ a = :test, y = 16, x = 23, childs = $[
2186 ${ a = :test, x = 13, childs = $[${ i = 94 }] },
2187 ] }
2188 ]
2189 "#);
2190
2191 assert_eq!(pes("**:{ x = 1* }/childs/*/i", &v1), "$[90,91,92,94]");
2192 assert_eq!(pes("** ! key = childs/childs/*/i", &v1), "$[90,91,92,93]");
2193 assert_eq!(pes("** ! key = childs :{ x = 1* }/childs/*/i", &v1), "$[90,91,92]");
2194
2195 assert_eq!(pes("** =:{ a = test } /x", &v1), "$[10,12,23]");
2196 assert_eq!(pes("** =:{ a = test } | :type=vector /x", &v1), "$[10,12,13,23]");
2197 assert_eq!(pes("** =:{ a = test } | :type=vector :{y=16}/x|y|a", &v1), "$[16,23,:test]");
2198 assert_eq!(pes("** !key=childs =:{ a = test } | :type=vector /x", &v1), "$[10,12,23]");
2199 }
2200
2201 #[test]
2202 fn check_selector_node_path() {
2203 assert_eq!(p("a"), "$[:Path,$[:NK,$[$p(:I,:a)]]]");
2204 assert_eq!(p("a/0/2"), "$[:Path,$[:NK,$[$p(:I,:a)]],$[:NK,0],$[:NK,2]]");
2205 assert_eq!(p("a/b/c"), "$[:Path,$[:NK,$[$p(:I,:a)]],$[:NK,$[$p(:I,:b)]],$[:NK,$[$p(:I,:c)]]]");
2206
2207 assert_eq!(p("a/^b/c/^"), "Error: <selector>:1:9 EOF while parsing: Unexpected EOF\nat code:\n1 | \n");
2208 assert_eq!(p("a/^b/c/^*"), "$[:Path,$[:NK,$[$p(:I,:a)]],$[:NCap,$[:NK,$[$p(:I,:b)]]],$[:NK,$[$p(:I,:c)]],$[:NCap,$[:NK,$[:Glob]]]]");
2209 assert_eq!(p("a/^b/^c"), "$[:Path,$[:NK,$[$p(:I,:a)]],$[:NCap,$[:NK,$[$p(:I,:b)]]],$[:NCap,$[:NK,$[$p(:I,:c)]]]]");
2210 }
2211
2212 #[test]
2213 fn check_selector_globs() {
2214 assert_eq!(p("*"), "$[:Path,$[:NK,$[:Glob]]]");
2215 assert_eq!(p("**"), "$[:Path,$[:RecGlob,$n,$n,$n]]");
2216 assert_eq!(p("^**"), "$[:Path,$[:NCap,$[:RecGlob,$n,$n,$n]]]");
2217 assert_eq!(p("^*"), "$[:Path,$[:NCap,$[:NK,$[:Glob]]]]");
2218
2219 assert_eq!(p("(a)"), "$[:Path,$[:NK,$[$p(:PatSub,$[$p(:I,:a)])]]]");
2220 assert_eq!(p("(^a)"), "$[:Path,$[:NK,$[$p(:PatCap,$[$p(:I,:a)])]]]");
2221 assert_eq!(p("^(^a)"), "$[:Path,$[:NCap,$[:NK,$[$p(:PatCap,$[$p(:I,:a)])]]]]");
2222
2223 assert_eq!(p("(*|a?)"), "$[:Path,$[:NK,$[$p(:PatSub,$[:Alt,$[:Glob],$[$p(:I,:a),:Any]])]]]");
2224
2225 assert_eq!(p("*/*/a"), "$[:Path,$[:NK,$[:Glob]],$[:NK,$[:Glob]],$[:NK,$[$p(:I,:a)]]]");
2226 assert_eq!(p("* / * / a "), "$[:Path,$[:NK,$[:Glob]],$[:NK,$[:Glob]],$[:NK,$[$p(:I,:a)]]]");
2227 assert_eq!(p("**/^a/**"), "$[:Path,$[:RecGlob,$n,$n,$n],$[:NCap,$[:NK,$[$p(:I,:a)]]],$[:RecGlob,$n,$n,$n]]");
2228
2229 assert_eq!(p("?a"), "$[:Path,$[:NK,$[:Any,$p(:I,:a)]]]");
2230 }
2231
2232 #[test]
2233 fn check_selector_kvmatch_parse() {
2234 assert_eq!(p("*:{b=a}"), "$[:Path,$[:NK,$[:Glob],$[:KV,$[$[$p(:I,:b)],$[$p(:I,:a)]]]]]");
2235 assert_eq!(p(":{b=a,a=20}"), "$[:Path,$[:NK,$[],$[:KV,$[$[$p(:I,:b)],$[$p(:I,:a)]],$[$[$p(:I,:a)],$[$p(:I,:20)]]]]]");
2236 assert_eq!(p("a : { a = 20 }"), "$[:Path,$[:NK,$[$p(:I,:a)],$[:KV,$[$[$p(:I,:a)],$[$p(:I,:20)]]]]]");
2237 assert_eq!(p("a :!{ a = 20 }"), "$[:Path,$[:NK,$[$p(:I,:a)],$[:Not,$[:KV,$[$[$p(:I,:a)],$[$p(:I,:20)]]]]]]");
2238 assert_eq!(p("a : { a = 20, b=a(a?)[^ABC]cc*f}"), "$[:Path,$[:NK,$[$p(:I,:a)],$[:KV,$[$[$p(:I,:a)],$[$p(:I,:20)]],$[$[$p(:I,:b)],$[$p(:I,:a),$p(:PatSub,$[$p(:I,:a),:Any]),$p(:NCCls,\"ABC\"),$p(:I,:cc),:Glob,$p(:I,:f)]]]]]");
2239 assert_eq!(p("a : { a = 20 } | :{ b = 20 } & :{ x = 10}"), "$[:Path,$[:NK,$[$p(:I,:a)],$[:Or,$[:KV,$[$[$p(:I,:a)],$[$p(:I,:20)]]],$[:And,$[:KV,$[$[$p(:I,:b)],$[$p(:I,:20)]]],$[:KV,$[$[$p(:I,:x)],$[$p(:I,:10)]]]]]]]");
2240 assert_eq!(p("a : (b/*/c)"), "$[:Path,$[:NK,$[$p(:I,:a)],$[:LA,$[:Path,$[:NK,$[$p(:I,:b)]],$[:NK,$[:Glob]],$[:NK,$[$p(:I,:c)]]]]]]");
2241 assert_eq!(p("a :!(b/*/c)"), "$[:Path,$[:NK,$[$p(:I,:a)],$[:Not,$[:LA,$[:Path,$[:NK,$[$p(:I,:b)]],$[:NK,$[:Glob]],$[:NK,$[$p(:I,:c)]]]]]]]");
2242 }
2243
2244 #[test]
2245 fn check_selector_subpat() {
2246 assert_eq!(p("(^abc$$)"), "$[:Path,$[:NK,$[$p(:PatCap,$[$p(:I,:abc),:End])]]]");
2247 assert_eq!(p("(\\^abc$$)"), "$[:Path,$[:NK,$[$p(:PatSub,$[$p(:I,:\"^abc\"),:End])]]]");
2248
2249 assert_eq!(p("(abc)"), "$[:Path,$[:NK,$[$p(:PatSub,$[$p(:I,:abc)])]]]");
2250 assert_eq!(p("$!abc"), "$[:Path,$[:NK,$[$p(:ZwNegLA,$p(:I,:a)),$p(:I,:bc)]]]");
2251 assert_eq!(p("$!(abc)"), "$[:Path,$[:NK,$[$p(:ZwNegLA,$p(:PatSub,$[$p(:I,:abc)]))]]]");
2252 assert_eq!(p("$*(abc)"), "$[:Path,$[:NK,$[$p(:N0,$p(:PatSub,$[$p(:I,:abc)]))]]]");
2253 assert_eq!(p("$+(abc)"), "$[:Path,$[:NK,$[$p(:N1,$p(:PatSub,$[$p(:I,:abc)]))]]]");
2254 assert_eq!(p("$?(abc)"), "$[:Path,$[:NK,$[$p(:Opt,$p(:PatSub,$[$p(:I,:abc)]))]]]");
2255 assert_eq!(p("$=(abc)"), "$[:Path,$[:NK,$[$p(:ZwLA,$p(:PatSub,$[$p(:I,:abc)]))]]]");
2256 assert_eq!(p("$^abc$$"), "$[:Path,$[:NK,$[:Start,$p(:I,:abc),:End]]]");
2257 assert_eq!(p("(\\$abc)"), "$[:Path,$[:NK,$[$p(:PatSub,$[$p(:I,:\"$abc\")])]]]");
2258 }
2259
2260 #[test]
2261 fn check_patterns() {
2262 assert_eq!(pat("A", "XXAYY"), "A");
2263 assert_eq!(pat("AY", "XXAYY"), "AY");
2264
2265 assert_eq!(pat("A($<+B)", "ABB"), "AB");
2266 assert_eq!(pat("A($<+B)", "AB"), "AB");
2267 assert_eq!(pat("A($<+B)", "ABBB"), "AB");
2268
2269 assert_eq!(pat("A($<+B)$$", "ABB"), "ABB");
2270
2271 assert_eq!(pat("$^A($<+B)$$", "ABB"), "ABB");
2272 assert_eq!(pat("$^A($<+B)$$", "ABBB"), "ABBB");
2273 assert_eq!(pat("$^$+A($<+B)$$", "ABBB"), "ABBB");
2274 assert_eq!(pat("$^$<+A($<+B)$$", "ABBB"), "ABBB");
2275 assert_eq!(pat("$^$<+A($<+B)$$", "ABBB"), "ABBB");
2276 assert_eq!(pat("$^$<+A($<+B)C$$", "ABBBC"), "ABBBC");
2277 assert_eq!(pat("$^A($<+B)$$", "AB"), "AB");
2278 assert_eq!(pat("$^A($<+B)", "ABB"), "AB");
2279 assert_eq!(pat("$^A($<+B)", "ABBB"), "AB");
2280 assert_eq!(pat("$^$+A($<+B)", "AABBB"), "AAB");
2281 assert_eq!(pat("$^$<+A($<+B)", "AABBB"), "AAB");
2282 assert_eq!(pat("$^$<+A($<+B)", "AABBB"), "AAB");
2283 assert_eq!(pat("$^$<+A($<+B)C", "AABBBC"), "AABBBC");
2284 assert_eq!(pat("$^A($<+B)", "AB"), "AB");
2285
2286 assert_eq!(pat("$^ABC$$", "ABC"), "ABC");
2287 assert_eq!(pat("$^AB$$C", "ABC"), "-nomatch-");
2288 assert_eq!(pat("A$^ABC$$", "ABC"), "-nomatch-");
2289
2290 assert_eq!(pat("$^A($+BB)C$$", "ABBBC"), "ABBBC");
2291 assert_eq!(pat("$^A($+(B)B)C$$", "ABBBC"), "ABBBC");
2292 assert_eq!(pat("$^A($+($+B)B)C$$", "ABBBC"), "ABBBC");
2293 assert_eq!(pat("$^A($+(($+B)B)B)C$$", "ABBBC"), "ABBBC");
2294 assert_eq!(pat("$^A($+(($+B))B)C$$", "ABBBC"), "ABBBC");
2295 assert_eq!(pat("$^A($+(($+B)B))C$$", "ABBBC"), "ABBBC");
2296 assert_eq!(pat("$^$+A($+($+B)B)C$$", "ABBBC"), "ABBBC");
2297 assert_eq!(pat("$^$+A$+(B)$+BC$$", "ABBBC"), "ABBBC");
2298 assert_eq!(pat("$^$+A((B)$+B)$$", "ABB"), "ABB");
2299 assert_eq!(pat("$^$+BC$$", "BC"), "BC");
2300
2301 assert_eq!(pat("$^$+C$$", "C"), "C");
2302 assert_eq!(pat("$^ABBB$+C$$", "ABBBC"), "ABBBC");
2303 assert_eq!(pat("$^$+A($+($+B)$+B)$+C$$", "ABBBC"), "ABBBC");
2304
2305 assert_eq!(pat("$^($<+B)C$$", "BC"), "BC");
2306 assert_eq!(pat("$^$<+A($<+B)C$$", "ABC"), "ABC");
2307 assert_eq!(pat("$^$<+A((B)$<+B)C$$", "ABBC"), "ABBC");
2308 assert_eq!(pat("$^$<+BB$$", "BB"), "BB");
2309 assert_eq!(pat("$<+BB$$", "BB"), "BB");
2310 assert_eq!(pat("$+BB$$", "BB"), "BB");
2311 assert_eq!(pat("$^$<+BB$$", "BB"), "BB");
2312 assert_eq!(pat("$^$<+B$$", "B"), "B");
2313 assert_eq!(pat("$^$<+BB$$", "BBB"), "BBB");
2314 assert_eq!(pat("$^A$<+BB$$", "ABBB"), "ABBB");
2315 assert_eq!(pat("$^A$<+BBC$$", "ABBBC"), "ABBBC");
2316 assert_eq!(pat("$^A($<+B$<+B)C$$", "ABBBC"), "ABBBC");
2317 assert_eq!(pat("$^$<+A($<+B$<+B)C$$", "ABBBC"), "ABBBC");
2318 assert_eq!(pat("$^$<+A($<+(B)$<+B)C$$", "ABBBC"), "ABBBC");
2319 assert_eq!(pat("$^$<+A($<+($<+B)$<+B)C$$", "ABBBC"), "ABBBC");
2320 assert_eq!(pat("$^$<+A($<+($<+B)$<+B)C$$", "ABBBC"), "ABBBC");
2321
2322 assert_eq!(pat("$^$<+C$$", "C"), "C");
2323 assert_eq!(pat("$^ABBB$<+C$$", "ABBBC"), "ABBBC");
2324 assert_eq!(pat("$^$<+A($<+($<+B)$<+B)$<+C$$", "ABBBC"), "ABBBC");
2325
2326 assert_eq!(pat("$^A($*BB)C$$", "ABBBC"), "ABBBC");
2327 assert_eq!(pat("$^A(^B)C$$", "ABC"), "ABC-B");
2328 assert_eq!(pat("$^A(^$*B)C$$", "ABBBC"), "ABBBC-BBB");
2329 assert_eq!(pat("BC", "ABC"), "BC");
2330 assert_eq!(pat("(BC)", "ABC"), "BC");
2331 assert_eq!(pat("(^BC)", "ABC"), "BC-BC");
2332 assert_eq!(pat("$^[ ]$$", " "), " ");
2333 assert_eq!(pat("$^$*[ ]$$", " "), " ");
2334
2335 assert_eq!(pat("$^ $!x*$=b? $$", "ab"), "ab");
2336 assert_eq!(pat("$^ $!x*$=b? $$", "xyab"), "-nomatch-");
2337
2338 assert_eq!(pat("$<?a", "a"), "");
2339 assert_eq!(pat("$?a", "a"), "a");
2340 assert_eq!(pat("$^ (^$<?a)(^$+a) $$", "aaa"), "aaa--aaa");
2341 assert_eq!(pat("$^ (^$?a)(^$+a) $$", "aaa"), "aaa-a-aa");
2342
2343 assert_eq!(pat("$+($?abab)", "abbbxababb"), "abab");
2344 assert_eq!(pat("$*($?abab)", "abbbxababb"), "");
2345 assert_eq!(pat("$+(x$?abab)", "abbbxababb"), "xabab");
2346 assert_eq!(pat("$+(x$?abab)", "abbbxababxababb"), "xababxabab");
2347 assert_eq!(pat("$<+(x$?abab)", "abbbxababxababb"), "xabab");
2348 assert_eq!(pat("bbb$*(x$?abab)", "abbbxababxababb"), "bbbxababxabab");
2349 assert_eq!(pat("bbb$<*(x$?abab)x", "abbbxababxababb"), "bbbx");
2350 assert_eq!(pat("bbb$<*?ba", "abbbxababxababb"), "bbbxaba");
2351 assert_eq!(pat("bbb$*?ba", "abbbxababxababb"), "bbbxababxaba");
2352 assert_eq!(pat("bbb$<*(x$?abab)", "abbbxababxababb"), "bbb");
2353 assert_eq!(pat("$*(a$?b)", "abbbababb"), "ab");
2354 assert_eq!(pat("$*($?ab)", "abbbababb"), "abbbababb");
2355 assert_eq!(pat("$<*($?ab)", "abbbababb"), "");
2356
2357 assert_eq!(pat("[\\t\\0\\u{0101}]", "\0"), "\u{0}");
2358 assert_eq!(pat("[\\t\\0\\u{0101}]", "\t"), "\t");
2359 assert_eq!(pat("[\\t\\0\\u{0101}]", "ā"), "ā");
2360
2361 assert_eq!(pat("a$?[xy]a", "aa"), "aa");
2362 assert_eq!(pat("$^$?[xy]$$", "a"), "-nomatch-");
2363 assert_eq!(pat("$?[xy]", "x"), "x");
2364 assert_eq!(pat("$?[xy]a", "xa"), "xa");
2365 assert_eq!(pat("$?[xy][xy]abc$$", "xyabc"), "xyabc");
2366 assert_eq!(pat("$?[xy][xy]ab", "xyab"), "xyab");
2367 assert_eq!(pat("$?[xy][xy]ab$$", "xyab"), "xyab");
2368
2369 assert_eq!(pat("xyab|ab", "xyab"), "xyab");
2370 assert_eq!(pat("xyab|ab", "jjab"), "ab");
2371 assert_eq!(pat("(x|y|z)(y|x)(ab|)", "xyab"), "xyab");
2372 assert_eq!(pat("$+(x|y|z)(y|x)(ab|)", "zxyab"), "zxyab");
2373 assert_eq!(pat("$^ $*(x|y|z)(ab|) $$", "zxyab"), "zxyab");
2374
2375 assert_eq!(pat("$^ $+$s $$", " \t\n\r "), " \t\n\r ");
2376 assert_eq!(pat(" $+ $S ", " \t\nXXJFJF\r "), "XXJFJF");
2377
2378 assert_eq!(pat("AB $&L $+b $&U C", " ABbbBbc "), "ABbbBbc");
2379 assert_eq!(pat("$&U A$+BC", " abbbbbc "), "abbbbbc");
2380 assert_eq!(pat("$&L a$+bc", " ABBBBBC "), "ABBBBBC");
2381
2382 assert_eq!(pat("$+[a-z]", "ABabzXXZ"), "abz");
2383 assert_eq!(pat("$+[^a-z]", "ABabzXXZ"), "AB");
2384 assert_eq!(pat("$+[-z]", "ABab-z-XXZ"), "-z-");
2385 assert_eq!(pat("$+[z-]", "ABab-z-XXZ"), "-z-");
2386
2387 assert_eq!(pat("$+\\f", "ABfffFO"), "fff");
2388 assert_eq!(pat("$+\\x41", "BAAAO"), "AAA");
2389 assert_eq!(pat("$+ \\$", "ABx$$$xxFO"), "$$$");
2390 assert_eq!(pat("x$* \\$", "ABx$$$xxFO"), "x$$$");
2391 assert_eq!(pat("\\u{2211}", "∑"), "∑");
2392 }
2393
2394 #[test]
2395 fn check_pattern_ident_mode() {
2396 assert_eq!(pat("($*!/$*\\\\^$*,':;{}=)", "!!!/\\\\\\^':;{}="), "!!!/\\\\\\^':;{}=");
2397 }
2398
2399 #[test]
2400 fn check_patterns_py_tests() {
2401 assert_eq!(pat("(ab|cd)e", "cde"), "cde");
2402 assert_eq!(pat("$*($+a|b)", "ab"), "ab");
2403 assert_eq!(pat("$+($+a|b)", "ab"), "ab");
2404 assert_eq!(pat("$?($+a|b)", "ab"), "a");
2405
2406 assert_eq!(re2wlpat("a.b"), "a?b");
2407 assert_eq!(rep("a.b", "acb"), "acb");
2408 assert_eq!(rep("a.*b", "acc\nccb"), "acc\nccb");
2409 }
2410
2411 #[test]
2412 fn check_pattern_capture() {
2413 assert_eq!(pat("(^ab|cd)e", "cde"), "cde-cd");
2414
2415 assert_eq!(pat("(^(^AA)C)$$", "AAC"), "AAC-AAC-AA");
2416 assert_eq!(pat("$<+(^aa)$*(^a)$+(^AA)", "aaaaAAAA"), "aaaaAAAA-aa-a-AA");
2417 assert_eq!(pat("$+(^aa|bb)$+(^A(A)|B(B)|X(X))", "aabbAABBBXX"), "aabbAABB-bb-BB");
2418 assert_eq!(pat("$+(^AA|BB|XX)", "AABBBXX"), "AABB-BB");
2419
2420 assert_eq!(pat("$+(^A|B|X)", "AABBBXX"), "AABBBXX-X");
2421 assert_eq!(pat("(^$+A|$+B|$+X)", "AABBBXX"), "AA-AA");
2422 assert_eq!(pat("(^$+A)(^$+B)$+(^X)$$", "AABBBXX"), "AABBBXX-AA-BBB-X");
2423 assert_eq!(pat("(^$+A)(^$?L)(^$+B)$+(^X)$$", "AABBBXX"), "AABBBXX-AA--BBB-X");
2424
2425 assert_eq!(pat("(^$<+(^aa)$*(^a)$+(^AA) | $<+(^aa)$*(^a)$+(^AA)C)$$", "aaaaAAAAC"), "aaaaAAAAC-aaaaAAAAC-aa-a-AA");
2426
2427 assert_eq!(pat("(^a)$*?(^$+b)$*?c", "afoefoeeoobbbbfec"), "afoefoeeoobbbbfec-a-b");
2428 assert_eq!(pat("(^a)$<*?(^$+b)$*?c", "afoefoeeoobbbbfec"), "afoefoeeoobbbbfec-a-bbbb");
2429 assert_eq!(pat("(^a)$*[^b](^$+b)$*?c", "afoefoeeoobbbbfec"), "afoefoeeoobbbbfec-a-bbbb");
2430 }
2431}