1use serde::{Deserialize, Deserializer, Serialize, Serializer};
2use smol_str::SmolStr;
3
4fn fmt_err(raw: &str, pos: usize, msg: &str) -> String {
15 let display = format!("{{{}}}", raw);
16 let pointer_offset = pos + 1;
18 let pointer_line: String = " ".repeat(pointer_offset) + "^";
19 format!(
20 "sep pattern error: {}\n {}\n {}",
21 msg, display, pointer_line
22 )
23}
24
25fn fmt_err_no_pos(raw: &str, msg: &str) -> String {
27 format!("sep pattern error: {} in {{{}}}", msg, raw)
28}
29
30#[derive(Debug, Clone, PartialEq)]
34pub struct SepMatch {
35 pub consumed: usize,
37 pub matched: usize,
39}
40
41#[derive(Debug, Clone, PartialEq)]
43pub enum GlobSegment {
44 Literal(SmolStr),
46 Star,
48 Any,
50 Whitespace,
52 NonWhitespace,
54 HorizontalWhitespace,
56 NonHorizontalWhitespace,
58}
59
60#[derive(Debug, Clone, PartialEq)]
62pub struct GlobPattern {
63 pub segments: Vec<GlobSegment>,
64 pub preserve: Option<Vec<GlobSegment>>,
65}
66
67#[derive(Debug, Clone, PartialEq)]
69pub enum SepMatcher {
70 Literal(SmolStr),
72 Glob(GlobPattern),
74}
75
76#[derive(Debug, Clone, PartialEq)]
78pub struct SepPattern {
79 pub(crate) raw: SmolStr,
80 pub(crate) compiled: SepMatcher,
81}
82
83pub fn build_pattern(raw: &str) -> Result<SepPattern, String> {
87 if raw.is_empty() {
88 return Err("sep pattern error: pattern is empty, expected content inside {}".to_string());
89 }
90
91 let (main_raw, preserve_raw) = split_preserve(raw)?;
93
94 let main_offset = 0;
96 let (segments, star_count) = parse_segments(raw, main_raw, main_offset)?;
97
98 let preserve = if let Some(pr) = preserve_raw {
100 let preserve_offset = main_raw.len() + 1; let (psegs, _) = parse_segments(raw, pr, preserve_offset)?;
102 Some(psegs)
103 } else {
104 None
105 };
106
107 if star_count > 1 {
109 let second_star_pos = find_nth_unescaped(raw, b'*', 2).unwrap_or(raw.len() - 1);
111 return Err(fmt_err(raw, second_star_pos, "at most one * allowed"));
112 }
113
114 if segments.is_empty() && preserve.as_ref().is_none_or(|p| p.is_empty()) {
116 return Err(fmt_err_no_pos(
117 raw,
118 "pattern resolves to empty after parsing",
119 ));
120 }
121
122 let has_wildcard = segments.iter().any(|s| {
124 matches!(
125 s,
126 GlobSegment::Star
127 | GlobSegment::Any
128 | GlobSegment::Whitespace
129 | GlobSegment::NonWhitespace
130 | GlobSegment::HorizontalWhitespace
131 | GlobSegment::NonHorizontalWhitespace
132 )
133 });
134 let compiled = if !has_wildcard && preserve.is_none() {
135 let lit: String = segments
137 .iter()
138 .map(|s| match s {
139 GlobSegment::Literal(l) => l.as_str(),
140 _ => unreachable!(),
141 })
142 .collect();
143 SepMatcher::Literal(SmolStr::from(lit))
144 } else {
145 SepMatcher::Glob(GlobPattern { segments, preserve })
146 };
147
148 Ok(SepPattern {
149 raw: SmolStr::from(raw),
150 compiled,
151 })
152}
153
154fn find_nth_unescaped(s: &str, target: u8, n: usize) -> Option<usize> {
156 let bytes = s.as_bytes();
157 let mut count = 0;
158 for i in 0..bytes.len() {
159 if bytes[i] == target && !is_escaped(bytes, i) {
160 count += 1;
161 if count == n {
162 return Some(i);
163 }
164 }
165 }
166 None
167}
168
169fn split_preserve(raw: &str) -> Result<(&str, Option<&str>), String> {
172 let bytes = raw.as_bytes();
173 let len = bytes.len();
174 if len == 0 || bytes[len - 1] != b')' {
175 return Ok((raw, None));
176 }
177 if is_escaped(bytes, len - 1) {
179 return Ok((raw, None));
180 }
181 let mut depth = 0i32;
183 let mut open_pos = None;
184 let mut i = len;
185 while i > 0 {
186 i -= 1;
187 if bytes[i] == b')' && !is_escaped(bytes, i) {
188 depth += 1;
189 } else if bytes[i] == b'(' && !is_escaped(bytes, i) {
190 depth -= 1;
191 if depth == 0 {
192 open_pos = Some(i);
193 break;
194 }
195 }
196 }
197 let open = match open_pos {
198 Some(p) => p,
199 None => return Ok((raw, None)), };
201
202 let main_part = &raw[..open];
211 {
212 let mb = main_part.as_bytes();
213 for j in 0..mb.len() {
214 if mb[j] == b'(' && !is_escaped(mb, j) {
215 return Err(fmt_err(
216 raw,
217 j,
218 "(...) must appear only at the end; found earlier '(' here",
219 ));
220 }
221 }
222 }
223
224 let preserve_content = &raw[open + 1..len - 1];
225 Ok((main_part, Some(preserve_content)))
226}
227
228fn is_escaped(bytes: &[u8], pos: usize) -> bool {
230 let mut count = 0usize;
231 let mut p = pos;
232 while p > 0 {
233 p -= 1;
234 if bytes[p] == b'\\' {
235 count += 1;
236 } else {
237 break;
238 }
239 }
240 count % 2 == 1
241}
242
243fn parse_segments(
247 raw: &str,
248 s: &str,
249 base_offset: usize,
250) -> Result<(Vec<GlobSegment>, usize), String> {
251 let mut segs = Vec::new();
252 let mut lit_buf = String::new();
253 let mut star_count = 0usize;
254 let bytes = s.as_bytes();
255 let len = bytes.len();
256 let mut i = 0;
257
258 while i < len {
259 let b = bytes[i];
260 if b == b'\\' && i + 1 < len {
261 let next = bytes[i + 1];
262 match next {
263 b'\\' | b'*' | b'?' | b'{' | b'}' | b'(' | b')' => {
264 lit_buf.push(next as char);
265 i += 2;
266 }
267 b'0' => {
268 lit_buf.push('\0');
269 i += 2;
270 }
271 b'n' => {
272 lit_buf.push('\n');
273 i += 2;
274 }
275 b't' => {
276 lit_buf.push('\t');
277 i += 2;
278 }
279 b'r' => {
280 lit_buf.push('\r');
281 i += 2;
282 }
283 b's' => {
284 flush_literal(&mut lit_buf, &mut segs);
285 segs.push(GlobSegment::Whitespace);
286 i += 2;
287 }
288 b'S' => {
289 flush_literal(&mut lit_buf, &mut segs);
290 segs.push(GlobSegment::NonWhitespace);
291 i += 2;
292 }
293 b'h' => {
294 flush_literal(&mut lit_buf, &mut segs);
295 segs.push(GlobSegment::HorizontalWhitespace);
296 i += 2;
297 }
298 b'H' => {
299 flush_literal(&mut lit_buf, &mut segs);
300 segs.push(GlobSegment::NonHorizontalWhitespace);
301 i += 2;
302 }
303 _ => {
304 lit_buf.push(next as char);
308 i += 2;
309 }
310 }
311 } else if b == b'*' {
312 flush_literal(&mut lit_buf, &mut segs);
313 segs.push(GlobSegment::Star);
314 star_count += 1;
315 if star_count > 1 {
316 return Err(fmt_err(
317 raw,
318 base_offset + i,
319 "at most one * allowed; use \\* to match a literal asterisk",
320 ));
321 }
322 i += 1;
323 } else if b == b'?' {
324 flush_literal(&mut lit_buf, &mut segs);
325 segs.push(GlobSegment::Any);
326 i += 1;
327 } else if b == b'(' || b == b')' {
328 return Err(fmt_err(
329 raw,
330 base_offset + i,
331 &format!(
332 "unexpected '{}'; (...) preserve must be at the end, use \\{} for literal",
333 b as char, b as char
334 ),
335 ));
336 } else {
337 let ch = s[i..].chars().next().unwrap();
339 lit_buf.push(ch);
340 i += ch.len_utf8();
341 }
342 }
343 flush_literal(&mut lit_buf, &mut segs);
344 Ok((segs, star_count))
345}
346
347fn flush_literal(buf: &mut String, segs: &mut Vec<GlobSegment>) {
348 if !buf.is_empty() {
349 segs.push(GlobSegment::Literal(SmolStr::from(buf.as_str())));
350 buf.clear();
351 }
352}
353
354impl SepPattern {
357 pub fn find(&self, haystack: &str) -> Option<(usize, SepMatch)> {
360 match &self.compiled {
361 SepMatcher::Literal(lit) => {
362 let pos = haystack.find(lit.as_str())?;
363 Some((
364 pos,
365 SepMatch {
366 consumed: lit.len(),
367 matched: lit.len(),
368 },
369 ))
370 }
371 SepMatcher::Glob(glob) => glob_find(glob, haystack),
372 }
373 }
374
375 pub fn match_at_start(&self, haystack: &str) -> Option<SepMatch> {
377 match &self.compiled {
378 SepMatcher::Literal(lit) => {
379 if haystack.starts_with(lit.as_str()) {
380 Some(SepMatch {
381 consumed: lit.len(),
382 matched: lit.len(),
383 })
384 } else {
385 None
386 }
387 }
388 SepMatcher::Glob(glob) => glob_match_at(glob, haystack, 0).map(|total| {
389 let main_len = try_match_segments(&glob.segments, haystack).unwrap_or(0);
390 let consumed = main_len;
391 SepMatch {
392 consumed,
393 matched: total,
394 }
395 }),
396 }
397 }
398
399 pub fn raw(&self) -> &str {
401 self.raw.as_str()
402 }
403}
404
405fn try_match_star_split(segments: &[GlobSegment], s: &str) -> Option<(usize, usize)> {
409 debug_assert!(matches!(segments.first(), Some(GlobSegment::Star)));
410 let remaining = &segments[1..];
411 if let Some(rest_len) = try_match_segments(remaining, s) {
413 return Some((0, rest_len));
414 }
415 let mut char_iter = s.char_indices();
416 while let Some((_, _)) = char_iter.next() {
417 let byte_pos = char_iter.clone().next().map(|(p, _)| p).unwrap_or(s.len());
418 let after = &s[byte_pos..];
419 if let Some(rest_len) = try_match_segments(remaining, after) {
420 return Some((byte_pos, rest_len));
421 }
422 }
423 None
424}
425
426fn glob_find(glob: &GlobPattern, haystack: &str) -> Option<(usize, SepMatch)> {
428 let segs = &glob.segments;
429 if segs.is_empty() {
430 if let Some(preserve) = &glob.preserve {
433 if let Some(GlobSegment::Literal(first_lit)) = preserve.first() {
435 let lit = first_lit.as_str();
436 let mut search_start = 0;
437 while search_start <= haystack.len() {
438 if let Some(pos) = haystack[search_start..].find(lit) {
439 let abs_pos = search_start + pos;
440 if let Some(plen) = try_match_segments(preserve, &haystack[abs_pos..]) {
441 return Some((
442 abs_pos,
443 SepMatch {
444 consumed: 0,
445 matched: plen,
446 },
447 ));
448 }
449 let next_char_len = haystack[abs_pos..]
450 .chars()
451 .next()
452 .map(|c| c.len_utf8())
453 .unwrap_or(1);
454 search_start = abs_pos + next_char_len;
455 } else {
456 break;
457 }
458 }
459 return None;
460 }
461 for (pos, _) in haystack.char_indices() {
463 if let Some(plen) = try_match_segments(preserve, &haystack[pos..]) {
464 return Some((
465 pos,
466 SepMatch {
467 consumed: 0,
468 matched: plen,
469 },
470 ));
471 }
472 }
473 return None;
474 }
475 return None;
476 }
477
478 if matches!(segs.first(), Some(GlobSegment::Star)) {
481 let (star_bytes, rest_bytes) = try_match_star_split(segs, haystack)?;
482 let preserve_bytes = if let Some(preserve) = &glob.preserve {
483 let after_main = &haystack[star_bytes + rest_bytes..];
484 try_match_segments(preserve, after_main)?
485 } else {
486 0
487 };
488 return Some((
489 star_bytes,
490 SepMatch {
491 consumed: rest_bytes,
492 matched: rest_bytes + preserve_bytes,
493 },
494 ));
495 }
496
497 if let Some(GlobSegment::Literal(first_lit)) = segs.first() {
499 let lit = first_lit.as_str();
500 let mut search_start = 0;
501 while search_start <= haystack.len() {
502 if let Some(pos) = haystack[search_start..].find(lit) {
503 let abs_pos = search_start + pos;
504 if let Some(total) = glob_match_at(glob, haystack, abs_pos) {
505 let main_len = try_match_segments(segs, &haystack[abs_pos..]).unwrap_or(0);
506 return Some((
507 abs_pos,
508 SepMatch {
509 consumed: main_len,
510 matched: total,
511 },
512 ));
513 }
514 let next_char_len = haystack[abs_pos..]
516 .chars()
517 .next()
518 .map(|c| c.len_utf8())
519 .unwrap_or(1);
520 search_start = abs_pos + next_char_len;
521 } else {
522 break;
523 }
524 }
525 return None;
526 }
527
528 for (pos, _) in haystack.char_indices() {
530 if let Some(total) = glob_match_at(glob, haystack, pos) {
531 let main_len = try_match_segments(segs, &haystack[pos..]).unwrap_or(0);
532 return Some((
533 pos,
534 SepMatch {
535 consumed: main_len,
536 matched: total,
537 },
538 ));
539 }
540 }
541 None
542}
543
544fn glob_match_at(glob: &GlobPattern, haystack: &str, start: usize) -> Option<usize> {
547 let s = &haystack[start..];
548 let main_len = try_match_segments(&glob.segments, s)?;
549 if let Some(preserve) = &glob.preserve {
550 let rest = &s[main_len..];
551 let plen = try_match_segments(preserve, rest)?;
552 Some(main_len + plen)
553 } else {
554 Some(main_len)
555 }
556}
557
558fn try_match_segments(segments: &[GlobSegment], s: &str) -> Option<usize> {
560 if segments.is_empty() {
561 return Some(0);
562 }
563 match &segments[0] {
564 GlobSegment::Literal(lit) => {
565 if s.starts_with(lit.as_str()) {
566 let rest = &s[lit.len()..];
567 let tail = try_match_segments(&segments[1..], rest)?;
568 Some(lit.len() + tail)
569 } else {
570 None
571 }
572 }
573 GlobSegment::Any => {
574 let ch = s.chars().next()?;
575 let clen = ch.len_utf8();
576 let rest = &s[clen..];
577 let tail = try_match_segments(&segments[1..], rest)?;
578 Some(clen + tail)
579 }
580 GlobSegment::Whitespace => {
581 match_char_class_backtrack(consume_whitespace, s, &segments[1..])
582 }
583 GlobSegment::NonWhitespace => {
584 match_char_class_backtrack(consume_non_whitespace, s, &segments[1..])
585 }
586 GlobSegment::HorizontalWhitespace => {
587 match_char_class_backtrack(consume_horizontal_whitespace, s, &segments[1..])
588 }
589 GlobSegment::NonHorizontalWhitespace => {
590 match_char_class_backtrack(consume_non_horizontal_whitespace, s, &segments[1..])
591 }
592 GlobSegment::Star => {
593 let remaining = &segments[1..];
595 let mut char_iter = s.char_indices();
596 if let Some(tail) = try_match_segments(remaining, s) {
598 return Some(tail);
599 }
600 while let Some((_, ch)) = char_iter.next() {
602 let byte_pos = char_iter.clone().next().map(|(p, _)| p).unwrap_or(s.len());
603 let after = &s[byte_pos..];
606 if let Some(tail) = try_match_segments(remaining, after) {
607 return Some(byte_pos + tail);
608 }
609 let _ = ch;
611 }
612 None
613 }
614 }
615}
616
617fn match_char_class_backtrack(
623 consume_fn: fn(&str) -> usize,
624 s: &str,
625 remaining: &[GlobSegment],
626) -> Option<usize> {
627 let max = consume_fn(s);
628 if max == 0 {
629 return None;
630 }
631 let rest = &s[max..];
633 if let Some(tail) = try_match_segments(remaining, rest) {
634 return Some(max + tail);
635 }
636 let consumed_slice = &s[..max];
639 let mut pos = max;
640 for (i, _) in consumed_slice.char_indices().rev() {
641 pos = i;
643 if pos == 0 {
644 break; }
646 let rest = &s[pos..];
647 if let Some(tail) = try_match_segments(remaining, rest) {
648 return Some(pos + tail);
649 }
650 }
651 let _ = pos;
652 None
653}
654
655fn consume_whitespace(s: &str) -> usize {
656 let mut n = 0;
657 for ch in s.chars() {
658 if ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' {
659 n += ch.len_utf8();
660 } else {
661 break;
662 }
663 }
664 n
665}
666
667fn consume_non_whitespace(s: &str) -> usize {
668 let mut n = 0;
669 for ch in s.chars() {
670 if ch != ' ' && ch != '\t' && ch != '\r' && ch != '\n' {
671 n += ch.len_utf8();
672 } else {
673 break;
674 }
675 }
676 n
677}
678
679fn consume_horizontal_whitespace(s: &str) -> usize {
680 let mut n = 0;
681 for ch in s.chars() {
682 if ch == ' ' || ch == '\t' {
683 n += ch.len_utf8();
684 } else {
685 break;
686 }
687 }
688 n
689}
690
691fn consume_non_horizontal_whitespace(s: &str) -> usize {
692 let mut n = 0;
693 for ch in s.chars() {
694 if ch != ' ' && ch != '\t' {
695 n += ch.len_utf8();
696 } else {
697 break;
698 }
699 }
700 n
701}
702
703impl Serialize for SepPattern {
706 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
707 where
708 S: Serializer,
709 {
710 serializer.serialize_str(self.raw.as_str())
711 }
712}
713
714impl<'de> Deserialize<'de> for SepPattern {
715 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
716 where
717 D: Deserializer<'de>,
718 {
719 let s = String::deserialize(deserializer)?;
720 build_pattern(&s).map_err(serde::de::Error::custom)
721 }
722}
723
724#[cfg(test)]
727mod tests {
728 use super::*;
729
730 #[test]
733 fn test_parse_literal() {
734 let p = build_pattern("abc").unwrap();
735 assert_eq!(p.compiled, SepMatcher::Literal("abc".into()));
736 }
737
738 #[test]
739 fn test_parse_literal_with_newline() {
740 let p = build_pattern("ab\\n").unwrap();
741 assert_eq!(p.compiled, SepMatcher::Literal("ab\n".into()));
742 }
743
744 #[test]
745 fn test_parse_literal_with_null() {
746 let p = build_pattern("ab\\0").unwrap();
747 assert_eq!(p.compiled, SepMatcher::Literal("ab\0".into()));
748 }
749
750 #[test]
751 fn test_parse_literal_with_tab() {
752 let p = build_pattern("ab\\t").unwrap();
753 assert_eq!(p.compiled, SepMatcher::Literal("ab\t".into()));
754 }
755
756 #[test]
757 fn test_parse_literal_with_cr() {
758 let p = build_pattern("ab\\r").unwrap();
759 assert_eq!(p.compiled, SepMatcher::Literal("ab\r".into()));
760 }
761
762 #[test]
763 fn test_parse_escaped_chars() {
764 let p = build_pattern("a\\*b\\?c").unwrap();
765 assert_eq!(p.compiled, SepMatcher::Literal("a*b?c".into()));
766 }
767
768 #[test]
769 fn test_parse_escaped_braces() {
770 let p = build_pattern("a\\{b\\}c").unwrap();
771 assert_eq!(p.compiled, SepMatcher::Literal("a{b}c".into()));
772 }
773
774 #[test]
775 fn test_parse_escaped_parens() {
776 let p = build_pattern("a\\(b\\)").unwrap();
777 assert_eq!(p.compiled, SepMatcher::Literal("a(b)".into()));
778 }
779
780 #[test]
781 fn test_parse_glob_star_eq() {
782 let p = build_pattern("*=").unwrap();
783 match &p.compiled {
784 SepMatcher::Glob(g) => {
785 assert_eq!(g.segments.len(), 2);
786 assert_eq!(g.segments[0], GlobSegment::Star);
787 assert_eq!(g.segments[1], GlobSegment::Literal("=".into()));
788 assert!(g.preserve.is_none());
789 }
790 _ => panic!("expected Glob"),
791 }
792 }
793
794 #[test]
795 fn test_parse_glob_key_star() {
796 let p = build_pattern("key=*").unwrap();
797 match &p.compiled {
798 SepMatcher::Glob(g) => {
799 assert_eq!(g.segments.len(), 2);
800 assert_eq!(g.segments[0], GlobSegment::Literal("key=".into()));
801 assert_eq!(g.segments[1], GlobSegment::Star);
802 }
803 _ => panic!("expected Glob"),
804 }
805 }
806
807 #[test]
808 fn test_parse_glob_field_any() {
809 let p = build_pattern("field?:").unwrap();
810 match &p.compiled {
811 SepMatcher::Glob(g) => {
812 assert_eq!(g.segments.len(), 3);
813 assert_eq!(g.segments[0], GlobSegment::Literal("field".into()));
814 assert_eq!(g.segments[1], GlobSegment::Any);
815 assert_eq!(g.segments[2], GlobSegment::Literal(":".into()));
816 }
817 _ => panic!("expected Glob"),
818 }
819 }
820
821 #[test]
822 fn test_parse_whitespace() {
823 let p = build_pattern("\\s=").unwrap();
824 match &p.compiled {
825 SepMatcher::Glob(g) => {
826 assert_eq!(g.segments.len(), 2);
827 assert_eq!(g.segments[0], GlobSegment::Whitespace);
828 assert_eq!(g.segments[1], GlobSegment::Literal("=".into()));
829 }
830 _ => panic!("expected Glob"),
831 }
832 }
833
834 #[test]
835 fn test_parse_horizontal_whitespace() {
836 let p = build_pattern("\\h:\\h").unwrap();
837 match &p.compiled {
838 SepMatcher::Glob(g) => {
839 assert_eq!(g.segments.len(), 3);
840 assert_eq!(g.segments[0], GlobSegment::HorizontalWhitespace);
841 assert_eq!(g.segments[1], GlobSegment::Literal(":".into()));
842 assert_eq!(g.segments[2], GlobSegment::HorizontalWhitespace);
843 }
844 _ => panic!("expected Glob"),
845 }
846 }
847
848 #[test]
849 fn test_parse_non_whitespace() {
850 let p = build_pattern("\\s\\S=").unwrap();
851 match &p.compiled {
852 SepMatcher::Glob(g) => {
853 assert_eq!(g.segments.len(), 3);
854 assert_eq!(g.segments[0], GlobSegment::Whitespace);
855 assert_eq!(g.segments[1], GlobSegment::NonWhitespace);
856 assert_eq!(g.segments[2], GlobSegment::Literal("=".into()));
857 }
858 _ => panic!("expected Glob"),
859 }
860 }
861
862 #[test]
863 fn test_parse_non_horizontal_whitespace() {
864 let p = build_pattern("\\h\\H:\\H").unwrap();
865 match &p.compiled {
866 SepMatcher::Glob(g) => {
867 assert_eq!(g.segments.len(), 4);
868 assert_eq!(g.segments[0], GlobSegment::HorizontalWhitespace);
869 assert_eq!(g.segments[1], GlobSegment::NonHorizontalWhitespace);
870 assert_eq!(g.segments[2], GlobSegment::Literal(":".into()));
871 assert_eq!(g.segments[3], GlobSegment::NonHorizontalWhitespace);
872 }
873 _ => panic!("expected Glob"),
874 }
875 }
876
877 #[test]
878 fn test_parse_preserve() {
879 let p = build_pattern("*(key=)").unwrap();
880 match &p.compiled {
881 SepMatcher::Glob(g) => {
882 assert_eq!(g.segments, vec![GlobSegment::Star]);
883 let preserve = g.preserve.as_ref().unwrap();
884 assert_eq!(preserve.len(), 1);
885 assert_eq!(preserve[0], GlobSegment::Literal("key=".into()));
886 }
887 _ => panic!("expected Glob"),
888 }
889 }
890
891 #[test]
892 fn test_parse_preserve_with_whitespace() {
893 let p = build_pattern("*\\s(next)").unwrap();
894 match &p.compiled {
895 SepMatcher::Glob(g) => {
896 assert_eq!(g.segments, vec![GlobSegment::Star, GlobSegment::Whitespace]);
897 let preserve = g.preserve.as_ref().unwrap();
898 assert_eq!(preserve.len(), 1);
899 assert_eq!(preserve[0], GlobSegment::Literal("next".into()));
900 }
901 _ => panic!("expected Glob"),
902 }
903 }
904
905 #[test]
908 fn test_err_multi_star() {
909 let e = build_pattern("*a*").unwrap_err();
910 assert!(e.contains("at most one * allowed"), "got: {}", e);
911 assert!(
913 e.contains("{*a*}"),
914 "should show the full pattern, got: {}",
915 e
916 );
917 assert!(e.contains("^"), "should have a pointer, got: {}", e);
918 }
919
920 #[test]
921 fn test_err_preserve_not_end() {
922 let e = build_pattern("(key)*=").unwrap_err();
923 assert!(
924 e.contains("(...)") || e.contains("preserve") || e.contains("unexpected '('"),
925 "got: {}",
926 e
927 );
928 }
929
930 #[test]
931 fn test_parse_star_in_preserve() {
932 let p = build_pattern("*(c*=)").unwrap();
934 match &p.compiled {
935 SepMatcher::Glob(g) => {
936 assert_eq!(g.segments, vec![GlobSegment::Star]);
937 let preserve = g.preserve.as_ref().unwrap();
938 assert_eq!(preserve.len(), 3);
939 assert_eq!(preserve[0], GlobSegment::Literal("c".into()));
940 assert_eq!(preserve[1], GlobSegment::Star);
941 assert_eq!(preserve[2], GlobSegment::Literal("=".into()));
942 }
943 _ => panic!("expected Glob"),
944 }
945 }
946
947 #[test]
948 fn test_err_empty() {
949 let e = build_pattern("").unwrap_err();
950 assert!(e.contains("empty"), "got: {}", e);
951 }
952
953 #[test]
954 fn test_unknown_escape_as_literal() {
955 let p = build_pattern("ab\\x").unwrap();
958 assert_eq!(p.compiled, SepMatcher::Literal("abx".into()));
959
960 let p = build_pattern("field\\:=").unwrap();
961 assert_eq!(p.compiled, SepMatcher::Literal("field:=".into()));
962
963 let p = build_pattern("\\z").unwrap();
964 assert_eq!(p.compiled, SepMatcher::Literal("z".into()));
965 }
966
967 #[test]
968 fn test_err_visual_pointer_position() {
969 let e = build_pattern("*a*").unwrap_err();
971 let lines: Vec<&str> = e.lines().collect();
972 assert!(lines.len() >= 3, "expected 3 lines, got: {}", e);
973 assert!(lines[1].contains("{*a*}"), "got line1: {}", lines[1]);
975 let pointer_line = lines[2];
977 let caret_pos = pointer_line.find('^').expect("no ^ found");
978 assert_eq!(
982 caret_pos, 5,
983 "caret at wrong position in: {:?}",
984 pointer_line
985 );
986 }
987
988 #[test]
989 fn test_err_messages_display() {
990 let cases = vec![
993 ("", "empty pattern"),
994 ("*a*", "multiple stars"),
995 ("(key)*=", "preserve not at end"),
996 ("test(mid)abc", "paren not at end"),
997 ];
998 for (input, label) in cases {
999 let err = build_pattern(input).unwrap_err();
1000 println!("--- {} ---\n{}\n", label, err);
1001 }
1002 }
1003
1004 #[test]
1007 fn test_match_literal() {
1008 let p = build_pattern("abc").unwrap();
1009 let (off, m) = p.find("xyzabcdef").unwrap();
1010 assert_eq!(off, 3);
1011 assert_eq!(m.consumed, 3);
1012 assert_eq!(m.matched, 3);
1013 }
1014
1015 #[test]
1016 fn test_match_literal_no_match() {
1017 let p = build_pattern("abc").unwrap();
1018 assert!(p.find("xyzdef").is_none());
1019 }
1020
1021 #[test]
1022 fn test_match_star_eq_non_greedy() {
1023 let p = build_pattern("*=").unwrap();
1027 let (off, m) = p.find("a=b=c").unwrap();
1028 assert_eq!(off, 1);
1029 assert_eq!(m.consumed, 1);
1030 assert_eq!(m.matched, 1);
1031 }
1032
1033 #[test]
1034 fn test_match_whitespace_eq() {
1035 let p = build_pattern("\\s=").unwrap();
1037 let (off, m) = p.find("key =val").unwrap();
1038 assert_eq!(off, 3);
1039 assert_eq!(m.consumed, 3);
1040 assert_eq!(m.matched, 3);
1041 }
1042
1043 #[test]
1044 fn test_match_preserve() {
1045 let p = build_pattern("*\\s(key=)").unwrap();
1050 let (off, m) = p.find("hello key=value").unwrap();
1051 assert_eq!(off, 5);
1052 assert_eq!(m.consumed, 2);
1053 assert_eq!(m.matched, 6); }
1055
1056 #[test]
1057 fn test_match_field_any() {
1058 let p = build_pattern("field?:").unwrap();
1060 let (off, m) = p.find("fieldA:value").unwrap();
1061 assert_eq!(off, 0);
1062 assert_eq!(m.consumed, 7);
1063 assert_eq!(m.matched, 7);
1064 }
1065
1066 #[test]
1067 fn test_match_horizontal_whitespace() {
1068 let p = build_pattern("\\h:\\h").unwrap();
1070 let (off, m) = p.find("key\t:\tval").unwrap();
1071 assert_eq!(off, 3);
1072 assert_eq!(m.consumed, 3);
1073 assert_eq!(m.matched, 3);
1074 }
1075
1076 #[test]
1077 fn test_match_non_whitespace() {
1078 let p = build_pattern("\\s\\S=").unwrap();
1083 let (off, m) = p.find("msg=Test message externalId=0").unwrap();
1084 assert_eq!(off, 16); assert_eq!(m.consumed, 12); assert_eq!(m.matched, 12);
1087 }
1088
1089 #[test]
1090 fn test_match_non_whitespace_preserve_kvarr() {
1091 let p = build_pattern("\\s(\\S=)").unwrap();
1094 let (off, m) = p.find("msg=Test message externalId=0").unwrap();
1095 assert_eq!(off, 16); assert_eq!(m.consumed, 1); assert_eq!(m.matched, 12); }
1099
1100 #[test]
1101 fn test_match_non_horizontal_whitespace() {
1102 let p = build_pattern("\\H=").unwrap();
1104 let (off, m) = p.find("key\t:\tval\texternalId=0").unwrap();
1105 assert_eq!(off, 10);
1108 assert_eq!(m.consumed, 11); }
1110
1111 #[test]
1112 fn test_match_no_match() {
1113 let p = build_pattern("\\s=").unwrap();
1114 assert!(p.find("key=val").is_none());
1115 }
1116
1117 #[test]
1118 fn test_match_at_start_literal() {
1119 let p = build_pattern("abc").unwrap();
1120 let m = p.match_at_start("abcdef").unwrap();
1121 assert_eq!(m.consumed, 3);
1122 assert!(p.match_at_start("xabc").is_none());
1123 }
1124
1125 #[test]
1126 fn test_match_at_start_glob() {
1127 let p = build_pattern("\\s=").unwrap();
1128 let m = p.match_at_start(" =val").unwrap();
1129 assert_eq!(m.consumed, 3);
1130 assert!(p.match_at_start("val =").is_none());
1131 }
1132
1133 #[test]
1134 fn test_match_star_at_end() {
1135 let p = build_pattern("key=*").unwrap();
1137 let (off, m) = p.find("key=value").unwrap();
1138 assert_eq!(off, 0);
1139 assert_eq!(m.consumed, 4); assert_eq!(m.matched, 4);
1144 }
1145
1146 #[test]
1147 fn test_match_star_newline() {
1148 let p = build_pattern("\\s=*\\n").unwrap();
1150 let (off, m) = p.find(" =hello\n").unwrap();
1151 assert_eq!(off, 0);
1152 assert_eq!(m.consumed, 9);
1153 }
1154
1155 #[test]
1156 fn test_match_preserve_only() {
1157 let p = build_pattern("(abc)").unwrap();
1159 match &p.compiled {
1160 SepMatcher::Glob(g) => {
1161 assert!(g.segments.is_empty());
1162 assert!(g.preserve.is_some());
1163 }
1164 _ => panic!("expected Glob"),
1165 }
1166 let (off, m) = p.find("abcdef").unwrap();
1168 assert_eq!(off, 0);
1169 assert_eq!(m.consumed, 0);
1170 assert_eq!(m.matched, 3);
1171
1172 let (off, m) = p.find("xyzabcdef").unwrap();
1174 assert_eq!(off, 3);
1175 assert_eq!(m.consumed, 0);
1176 assert_eq!(m.matched, 3);
1177
1178 assert!(p.find("xyzdef").is_none());
1180 }
1181
1182 #[test]
1183 fn test_match_preserve_only_command() {
1184 let p = build_pattern("(command=)").unwrap();
1186 let (off, m) = p.find("hello command=value").unwrap();
1187 assert_eq!(off, 6); assert_eq!(m.consumed, 0); assert_eq!(m.matched, 8); let (off, m) = p.find("command=value").unwrap();
1193 assert_eq!(off, 0);
1194 assert_eq!(m.consumed, 0);
1195 assert_eq!(m.matched, 8);
1196 }
1197
1198 #[test]
1199 fn test_match_preserve_with_star() {
1200 let p = build_pattern("(c*=)").unwrap();
1203 let (off, m) = p.find("hello cmd=value").unwrap();
1204 assert_eq!(off, 6); assert_eq!(m.consumed, 0);
1206 assert_eq!(m.matched, 4); let (off, m) = p.find("hello cat=1 cmd=2").unwrap();
1210 assert_eq!(off, 6); assert_eq!(m.consumed, 0);
1212 assert_eq!(m.matched, 4); }
1214
1215 #[test]
1218 fn test_serde_roundtrip() {
1219 let p = build_pattern("*\\s(key=)").unwrap();
1220 let json = serde_json::to_string(&p).unwrap();
1221 assert_eq!(json, r#""*\\s(key=)""#);
1223 let p2: SepPattern = serde_json::from_str(&json).unwrap();
1224 assert_eq!(p.raw, p2.raw);
1225 assert_eq!(p.compiled, p2.compiled);
1226 }
1227
1228 #[test]
1229 fn test_serde_roundtrip_literal() {
1230 let p = build_pattern("abc").unwrap();
1231 let json = serde_json::to_string(&p).unwrap();
1232 let p2: SepPattern = serde_json::from_str(&json).unwrap();
1233 assert_eq!(p, p2);
1234 }
1235}