use serde::{Deserialize, Deserializer, Serialize, Serializer};
use smol_str::SmolStr;
fn fmt_err(raw: &str, pos: usize, msg: &str) -> String {
let display = format!("{{{}}}", raw);
let pointer_offset = pos + 1;
let pointer_line: String = " ".repeat(pointer_offset) + "^";
format!(
"sep pattern error: {}\n {}\n {}",
msg, display, pointer_line
)
}
fn fmt_err_no_pos(raw: &str, msg: &str) -> String {
format!("sep pattern error: {} in {{{}}}", msg, raw)
}
#[derive(Debug, Clone, PartialEq)]
pub struct SepMatch {
pub consumed: usize,
pub matched: usize,
}
#[derive(Debug, Clone, PartialEq)]
pub enum GlobSegment {
Literal(SmolStr),
Star,
Any,
Whitespace,
NonWhitespace,
HorizontalWhitespace,
NonHorizontalWhitespace,
}
#[derive(Debug, Clone, PartialEq)]
pub struct GlobPattern {
pub segments: Vec<GlobSegment>,
pub preserve: Option<Vec<GlobSegment>>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum SepMatcher {
Literal(SmolStr),
Glob(GlobPattern),
}
#[derive(Debug, Clone, PartialEq)]
pub struct SepPattern {
pub(crate) raw: SmolStr,
pub(crate) compiled: SepMatcher,
}
pub fn build_pattern(raw: &str) -> Result<SepPattern, String> {
if raw.is_empty() {
return Err("sep pattern error: pattern is empty, expected content inside {}".to_string());
}
let (main_raw, preserve_raw) = split_preserve(raw)?;
let main_offset = 0;
let (segments, star_count) = parse_segments(raw, main_raw, main_offset)?;
let preserve = if let Some(pr) = preserve_raw {
let preserve_offset = main_raw.len() + 1; let (psegs, _) = parse_segments(raw, pr, preserve_offset)?;
Some(psegs)
} else {
None
};
if star_count > 1 {
let second_star_pos = find_nth_unescaped(raw, b'*', 2).unwrap_or(raw.len() - 1);
return Err(fmt_err(raw, second_star_pos, "at most one * allowed"));
}
if segments.is_empty() && preserve.as_ref().is_none_or(|p| p.is_empty()) {
return Err(fmt_err_no_pos(
raw,
"pattern resolves to empty after parsing",
));
}
let has_wildcard = segments.iter().any(|s| {
matches!(
s,
GlobSegment::Star
| GlobSegment::Any
| GlobSegment::Whitespace
| GlobSegment::NonWhitespace
| GlobSegment::HorizontalWhitespace
| GlobSegment::NonHorizontalWhitespace
)
});
let compiled = if !has_wildcard && preserve.is_none() {
let lit: String = segments
.iter()
.map(|s| match s {
GlobSegment::Literal(l) => l.as_str(),
_ => unreachable!(),
})
.collect();
SepMatcher::Literal(SmolStr::from(lit))
} else {
SepMatcher::Glob(GlobPattern { segments, preserve })
};
Ok(SepPattern {
raw: SmolStr::from(raw),
compiled,
})
}
fn find_nth_unescaped(s: &str, target: u8, n: usize) -> Option<usize> {
let bytes = s.as_bytes();
let mut count = 0;
for i in 0..bytes.len() {
if bytes[i] == target && !is_escaped(bytes, i) {
count += 1;
if count == n {
return Some(i);
}
}
}
None
}
fn split_preserve(raw: &str) -> Result<(&str, Option<&str>), String> {
let bytes = raw.as_bytes();
let len = bytes.len();
if len == 0 || bytes[len - 1] != b')' {
return Ok((raw, None));
}
if is_escaped(bytes, len - 1) {
return Ok((raw, None));
}
let mut depth = 0i32;
let mut open_pos = None;
let mut i = len;
while i > 0 {
i -= 1;
if bytes[i] == b')' && !is_escaped(bytes, i) {
depth += 1;
} else if bytes[i] == b'(' && !is_escaped(bytes, i) {
depth -= 1;
if depth == 0 {
open_pos = Some(i);
break;
}
}
}
let open = match open_pos {
Some(p) => p,
None => return Ok((raw, None)), };
let main_part = &raw[..open];
{
let mb = main_part.as_bytes();
for j in 0..mb.len() {
if mb[j] == b'(' && !is_escaped(mb, j) {
return Err(fmt_err(
raw,
j,
"(...) must appear only at the end; found earlier '(' here",
));
}
}
}
let preserve_content = &raw[open + 1..len - 1];
Ok((main_part, Some(preserve_content)))
}
fn is_escaped(bytes: &[u8], pos: usize) -> bool {
let mut count = 0usize;
let mut p = pos;
while p > 0 {
p -= 1;
if bytes[p] == b'\\' {
count += 1;
} else {
break;
}
}
count % 2 == 1
}
fn parse_segments(
raw: &str,
s: &str,
base_offset: usize,
) -> Result<(Vec<GlobSegment>, usize), String> {
let mut segs = Vec::new();
let mut lit_buf = String::new();
let mut star_count = 0usize;
let bytes = s.as_bytes();
let len = bytes.len();
let mut i = 0;
while i < len {
let b = bytes[i];
if b == b'\\' && i + 1 < len {
let next = bytes[i + 1];
match next {
b'\\' | b'*' | b'?' | b'{' | b'}' | b'(' | b')' => {
lit_buf.push(next as char);
i += 2;
}
b'0' => {
lit_buf.push('\0');
i += 2;
}
b'n' => {
lit_buf.push('\n');
i += 2;
}
b't' => {
lit_buf.push('\t');
i += 2;
}
b'r' => {
lit_buf.push('\r');
i += 2;
}
b's' => {
flush_literal(&mut lit_buf, &mut segs);
segs.push(GlobSegment::Whitespace);
i += 2;
}
b'S' => {
flush_literal(&mut lit_buf, &mut segs);
segs.push(GlobSegment::NonWhitespace);
i += 2;
}
b'h' => {
flush_literal(&mut lit_buf, &mut segs);
segs.push(GlobSegment::HorizontalWhitespace);
i += 2;
}
b'H' => {
flush_literal(&mut lit_buf, &mut segs);
segs.push(GlobSegment::NonHorizontalWhitespace);
i += 2;
}
_ => {
lit_buf.push(next as char);
i += 2;
}
}
} else if b == b'*' {
flush_literal(&mut lit_buf, &mut segs);
segs.push(GlobSegment::Star);
star_count += 1;
if star_count > 1 {
return Err(fmt_err(
raw,
base_offset + i,
"at most one * allowed; use \\* to match a literal asterisk",
));
}
i += 1;
} else if b == b'?' {
flush_literal(&mut lit_buf, &mut segs);
segs.push(GlobSegment::Any);
i += 1;
} else if b == b'(' || b == b')' {
return Err(fmt_err(
raw,
base_offset + i,
&format!(
"unexpected '{}'; (...) preserve must be at the end, use \\{} for literal",
b as char, b as char
),
));
} else {
let ch = s[i..].chars().next().unwrap();
lit_buf.push(ch);
i += ch.len_utf8();
}
}
flush_literal(&mut lit_buf, &mut segs);
Ok((segs, star_count))
}
fn flush_literal(buf: &mut String, segs: &mut Vec<GlobSegment>) {
if !buf.is_empty() {
segs.push(GlobSegment::Literal(SmolStr::from(buf.as_str())));
buf.clear();
}
}
impl SepPattern {
pub fn find(&self, haystack: &str) -> Option<(usize, SepMatch)> {
match &self.compiled {
SepMatcher::Literal(lit) => {
let pos = haystack.find(lit.as_str())?;
Some((
pos,
SepMatch {
consumed: lit.len(),
matched: lit.len(),
},
))
}
SepMatcher::Glob(glob) => glob_find(glob, haystack),
}
}
pub fn match_at_start(&self, haystack: &str) -> Option<SepMatch> {
match &self.compiled {
SepMatcher::Literal(lit) => {
if haystack.starts_with(lit.as_str()) {
Some(SepMatch {
consumed: lit.len(),
matched: lit.len(),
})
} else {
None
}
}
SepMatcher::Glob(glob) => glob_match_at(glob, haystack, 0).map(|total| {
let main_len = try_match_segments(&glob.segments, haystack).unwrap_or(0);
let consumed = main_len;
SepMatch {
consumed,
matched: total,
}
}),
}
}
pub fn raw(&self) -> &str {
self.raw.as_str()
}
}
fn try_match_star_split(segments: &[GlobSegment], s: &str) -> Option<(usize, usize)> {
debug_assert!(matches!(segments.first(), Some(GlobSegment::Star)));
let remaining = &segments[1..];
if let Some(rest_len) = try_match_segments(remaining, s) {
return Some((0, rest_len));
}
let mut char_iter = s.char_indices();
while let Some((_, _)) = char_iter.next() {
let byte_pos = char_iter.clone().next().map(|(p, _)| p).unwrap_or(s.len());
let after = &s[byte_pos..];
if let Some(rest_len) = try_match_segments(remaining, after) {
return Some((byte_pos, rest_len));
}
}
None
}
fn glob_find(glob: &GlobPattern, haystack: &str) -> Option<(usize, SepMatch)> {
let segs = &glob.segments;
if segs.is_empty() {
if let Some(preserve) = &glob.preserve {
if let Some(GlobSegment::Literal(first_lit)) = preserve.first() {
let lit = first_lit.as_str();
let mut search_start = 0;
while search_start <= haystack.len() {
if let Some(pos) = haystack[search_start..].find(lit) {
let abs_pos = search_start + pos;
if let Some(plen) = try_match_segments(preserve, &haystack[abs_pos..]) {
return Some((
abs_pos,
SepMatch {
consumed: 0,
matched: plen,
},
));
}
let next_char_len = haystack[abs_pos..]
.chars()
.next()
.map(|c| c.len_utf8())
.unwrap_or(1);
search_start = abs_pos + next_char_len;
} else {
break;
}
}
return None;
}
for (pos, _) in haystack.char_indices() {
if let Some(plen) = try_match_segments(preserve, &haystack[pos..]) {
return Some((
pos,
SepMatch {
consumed: 0,
matched: plen,
},
));
}
}
return None;
}
return None;
}
if matches!(segs.first(), Some(GlobSegment::Star)) {
let (star_bytes, rest_bytes) = try_match_star_split(segs, haystack)?;
let preserve_bytes = if let Some(preserve) = &glob.preserve {
let after_main = &haystack[star_bytes + rest_bytes..];
try_match_segments(preserve, after_main)?
} else {
0
};
return Some((
star_bytes,
SepMatch {
consumed: rest_bytes,
matched: rest_bytes + preserve_bytes,
},
));
}
if let Some(GlobSegment::Literal(first_lit)) = segs.first() {
let lit = first_lit.as_str();
let mut search_start = 0;
while search_start <= haystack.len() {
if let Some(pos) = haystack[search_start..].find(lit) {
let abs_pos = search_start + pos;
if let Some(total) = glob_match_at(glob, haystack, abs_pos) {
let main_len = try_match_segments(segs, &haystack[abs_pos..]).unwrap_or(0);
return Some((
abs_pos,
SepMatch {
consumed: main_len,
matched: total,
},
));
}
let next_char_len = haystack[abs_pos..]
.chars()
.next()
.map(|c| c.len_utf8())
.unwrap_or(1);
search_start = abs_pos + next_char_len;
} else {
break;
}
}
return None;
}
for (pos, _) in haystack.char_indices() {
if let Some(total) = glob_match_at(glob, haystack, pos) {
let main_len = try_match_segments(segs, &haystack[pos..]).unwrap_or(0);
return Some((
pos,
SepMatch {
consumed: main_len,
matched: total,
},
));
}
}
None
}
fn glob_match_at(glob: &GlobPattern, haystack: &str, start: usize) -> Option<usize> {
let s = &haystack[start..];
let main_len = try_match_segments(&glob.segments, s)?;
if let Some(preserve) = &glob.preserve {
let rest = &s[main_len..];
let plen = try_match_segments(preserve, rest)?;
Some(main_len + plen)
} else {
Some(main_len)
}
}
fn try_match_segments(segments: &[GlobSegment], s: &str) -> Option<usize> {
if segments.is_empty() {
return Some(0);
}
match &segments[0] {
GlobSegment::Literal(lit) => {
if s.starts_with(lit.as_str()) {
let rest = &s[lit.len()..];
let tail = try_match_segments(&segments[1..], rest)?;
Some(lit.len() + tail)
} else {
None
}
}
GlobSegment::Any => {
let ch = s.chars().next()?;
let clen = ch.len_utf8();
let rest = &s[clen..];
let tail = try_match_segments(&segments[1..], rest)?;
Some(clen + tail)
}
GlobSegment::Whitespace => {
match_char_class_backtrack(consume_whitespace, s, &segments[1..])
}
GlobSegment::NonWhitespace => {
match_char_class_backtrack(consume_non_whitespace, s, &segments[1..])
}
GlobSegment::HorizontalWhitespace => {
match_char_class_backtrack(consume_horizontal_whitespace, s, &segments[1..])
}
GlobSegment::NonHorizontalWhitespace => {
match_char_class_backtrack(consume_non_horizontal_whitespace, s, &segments[1..])
}
GlobSegment::Star => {
let remaining = &segments[1..];
let mut char_iter = s.char_indices();
if let Some(tail) = try_match_segments(remaining, s) {
return Some(tail);
}
while let Some((_, ch)) = char_iter.next() {
let byte_pos = char_iter.clone().next().map(|(p, _)| p).unwrap_or(s.len());
let after = &s[byte_pos..];
if let Some(tail) = try_match_segments(remaining, after) {
return Some(byte_pos + tail);
}
let _ = ch;
}
None
}
}
}
fn match_char_class_backtrack(
consume_fn: fn(&str) -> usize,
s: &str,
remaining: &[GlobSegment],
) -> Option<usize> {
let max = consume_fn(s);
if max == 0 {
return None;
}
let rest = &s[max..];
if let Some(tail) = try_match_segments(remaining, rest) {
return Some(max + tail);
}
let consumed_slice = &s[..max];
let mut pos = max;
for (i, _) in consumed_slice.char_indices().rev() {
pos = i;
if pos == 0 {
break; }
let rest = &s[pos..];
if let Some(tail) = try_match_segments(remaining, rest) {
return Some(pos + tail);
}
}
let _ = pos;
None
}
fn consume_whitespace(s: &str) -> usize {
let mut n = 0;
for ch in s.chars() {
if ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' {
n += ch.len_utf8();
} else {
break;
}
}
n
}
fn consume_non_whitespace(s: &str) -> usize {
let mut n = 0;
for ch in s.chars() {
if ch != ' ' && ch != '\t' && ch != '\r' && ch != '\n' {
n += ch.len_utf8();
} else {
break;
}
}
n
}
fn consume_horizontal_whitespace(s: &str) -> usize {
let mut n = 0;
for ch in s.chars() {
if ch == ' ' || ch == '\t' {
n += ch.len_utf8();
} else {
break;
}
}
n
}
fn consume_non_horizontal_whitespace(s: &str) -> usize {
let mut n = 0;
for ch in s.chars() {
if ch != ' ' && ch != '\t' {
n += ch.len_utf8();
} else {
break;
}
}
n
}
impl Serialize for SepPattern {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(self.raw.as_str())
}
}
impl<'de> Deserialize<'de> for SepPattern {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
build_pattern(&s).map_err(serde::de::Error::custom)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_literal() {
let p = build_pattern("abc").unwrap();
assert_eq!(p.compiled, SepMatcher::Literal("abc".into()));
}
#[test]
fn test_parse_literal_with_newline() {
let p = build_pattern("ab\\n").unwrap();
assert_eq!(p.compiled, SepMatcher::Literal("ab\n".into()));
}
#[test]
fn test_parse_literal_with_null() {
let p = build_pattern("ab\\0").unwrap();
assert_eq!(p.compiled, SepMatcher::Literal("ab\0".into()));
}
#[test]
fn test_parse_literal_with_tab() {
let p = build_pattern("ab\\t").unwrap();
assert_eq!(p.compiled, SepMatcher::Literal("ab\t".into()));
}
#[test]
fn test_parse_literal_with_cr() {
let p = build_pattern("ab\\r").unwrap();
assert_eq!(p.compiled, SepMatcher::Literal("ab\r".into()));
}
#[test]
fn test_parse_escaped_chars() {
let p = build_pattern("a\\*b\\?c").unwrap();
assert_eq!(p.compiled, SepMatcher::Literal("a*b?c".into()));
}
#[test]
fn test_parse_escaped_braces() {
let p = build_pattern("a\\{b\\}c").unwrap();
assert_eq!(p.compiled, SepMatcher::Literal("a{b}c".into()));
}
#[test]
fn test_parse_escaped_parens() {
let p = build_pattern("a\\(b\\)").unwrap();
assert_eq!(p.compiled, SepMatcher::Literal("a(b)".into()));
}
#[test]
fn test_parse_glob_star_eq() {
let p = build_pattern("*=").unwrap();
match &p.compiled {
SepMatcher::Glob(g) => {
assert_eq!(g.segments.len(), 2);
assert_eq!(g.segments[0], GlobSegment::Star);
assert_eq!(g.segments[1], GlobSegment::Literal("=".into()));
assert!(g.preserve.is_none());
}
_ => panic!("expected Glob"),
}
}
#[test]
fn test_parse_glob_key_star() {
let p = build_pattern("key=*").unwrap();
match &p.compiled {
SepMatcher::Glob(g) => {
assert_eq!(g.segments.len(), 2);
assert_eq!(g.segments[0], GlobSegment::Literal("key=".into()));
assert_eq!(g.segments[1], GlobSegment::Star);
}
_ => panic!("expected Glob"),
}
}
#[test]
fn test_parse_glob_field_any() {
let p = build_pattern("field?:").unwrap();
match &p.compiled {
SepMatcher::Glob(g) => {
assert_eq!(g.segments.len(), 3);
assert_eq!(g.segments[0], GlobSegment::Literal("field".into()));
assert_eq!(g.segments[1], GlobSegment::Any);
assert_eq!(g.segments[2], GlobSegment::Literal(":".into()));
}
_ => panic!("expected Glob"),
}
}
#[test]
fn test_parse_whitespace() {
let p = build_pattern("\\s=").unwrap();
match &p.compiled {
SepMatcher::Glob(g) => {
assert_eq!(g.segments.len(), 2);
assert_eq!(g.segments[0], GlobSegment::Whitespace);
assert_eq!(g.segments[1], GlobSegment::Literal("=".into()));
}
_ => panic!("expected Glob"),
}
}
#[test]
fn test_parse_horizontal_whitespace() {
let p = build_pattern("\\h:\\h").unwrap();
match &p.compiled {
SepMatcher::Glob(g) => {
assert_eq!(g.segments.len(), 3);
assert_eq!(g.segments[0], GlobSegment::HorizontalWhitespace);
assert_eq!(g.segments[1], GlobSegment::Literal(":".into()));
assert_eq!(g.segments[2], GlobSegment::HorizontalWhitespace);
}
_ => panic!("expected Glob"),
}
}
#[test]
fn test_parse_non_whitespace() {
let p = build_pattern("\\s\\S=").unwrap();
match &p.compiled {
SepMatcher::Glob(g) => {
assert_eq!(g.segments.len(), 3);
assert_eq!(g.segments[0], GlobSegment::Whitespace);
assert_eq!(g.segments[1], GlobSegment::NonWhitespace);
assert_eq!(g.segments[2], GlobSegment::Literal("=".into()));
}
_ => panic!("expected Glob"),
}
}
#[test]
fn test_parse_non_horizontal_whitespace() {
let p = build_pattern("\\h\\H:\\H").unwrap();
match &p.compiled {
SepMatcher::Glob(g) => {
assert_eq!(g.segments.len(), 4);
assert_eq!(g.segments[0], GlobSegment::HorizontalWhitespace);
assert_eq!(g.segments[1], GlobSegment::NonHorizontalWhitespace);
assert_eq!(g.segments[2], GlobSegment::Literal(":".into()));
assert_eq!(g.segments[3], GlobSegment::NonHorizontalWhitespace);
}
_ => panic!("expected Glob"),
}
}
#[test]
fn test_parse_preserve() {
let p = build_pattern("*(key=)").unwrap();
match &p.compiled {
SepMatcher::Glob(g) => {
assert_eq!(g.segments, vec![GlobSegment::Star]);
let preserve = g.preserve.as_ref().unwrap();
assert_eq!(preserve.len(), 1);
assert_eq!(preserve[0], GlobSegment::Literal("key=".into()));
}
_ => panic!("expected Glob"),
}
}
#[test]
fn test_parse_preserve_with_whitespace() {
let p = build_pattern("*\\s(next)").unwrap();
match &p.compiled {
SepMatcher::Glob(g) => {
assert_eq!(g.segments, vec![GlobSegment::Star, GlobSegment::Whitespace]);
let preserve = g.preserve.as_ref().unwrap();
assert_eq!(preserve.len(), 1);
assert_eq!(preserve[0], GlobSegment::Literal("next".into()));
}
_ => panic!("expected Glob"),
}
}
#[test]
fn test_err_multi_star() {
let e = build_pattern("*a*").unwrap_err();
assert!(e.contains("at most one * allowed"), "got: {}", e);
assert!(
e.contains("{*a*}"),
"should show the full pattern, got: {}",
e
);
assert!(e.contains("^"), "should have a pointer, got: {}", e);
}
#[test]
fn test_err_preserve_not_end() {
let e = build_pattern("(key)*=").unwrap_err();
assert!(
e.contains("(...)") || e.contains("preserve") || e.contains("unexpected '('"),
"got: {}",
e
);
}
#[test]
fn test_parse_star_in_preserve() {
let p = build_pattern("*(c*=)").unwrap();
match &p.compiled {
SepMatcher::Glob(g) => {
assert_eq!(g.segments, vec![GlobSegment::Star]);
let preserve = g.preserve.as_ref().unwrap();
assert_eq!(preserve.len(), 3);
assert_eq!(preserve[0], GlobSegment::Literal("c".into()));
assert_eq!(preserve[1], GlobSegment::Star);
assert_eq!(preserve[2], GlobSegment::Literal("=".into()));
}
_ => panic!("expected Glob"),
}
}
#[test]
fn test_err_empty() {
let e = build_pattern("").unwrap_err();
assert!(e.contains("empty"), "got: {}", e);
}
#[test]
fn test_unknown_escape_as_literal() {
let p = build_pattern("ab\\x").unwrap();
assert_eq!(p.compiled, SepMatcher::Literal("abx".into()));
let p = build_pattern("field\\:=").unwrap();
assert_eq!(p.compiled, SepMatcher::Literal("field:=".into()));
let p = build_pattern("\\z").unwrap();
assert_eq!(p.compiled, SepMatcher::Literal("z".into()));
}
#[test]
fn test_err_visual_pointer_position() {
let e = build_pattern("*a*").unwrap_err();
let lines: Vec<&str> = e.lines().collect();
assert!(lines.len() >= 3, "expected 3 lines, got: {}", e);
assert!(lines[1].contains("{*a*}"), "got line1: {}", lines[1]);
let pointer_line = lines[2];
let caret_pos = pointer_line.find('^').expect("no ^ found");
assert_eq!(
caret_pos, 5,
"caret at wrong position in: {:?}",
pointer_line
);
}
#[test]
fn test_err_messages_display() {
let cases = vec![
("", "empty pattern"),
("*a*", "multiple stars"),
("(key)*=", "preserve not at end"),
("test(mid)abc", "paren not at end"),
];
for (input, label) in cases {
let err = build_pattern(input).unwrap_err();
println!("--- {} ---\n{}\n", label, err);
}
}
#[test]
fn test_match_literal() {
let p = build_pattern("abc").unwrap();
let (off, m) = p.find("xyzabcdef").unwrap();
assert_eq!(off, 3);
assert_eq!(m.consumed, 3);
assert_eq!(m.matched, 3);
}
#[test]
fn test_match_literal_no_match() {
let p = build_pattern("abc").unwrap();
assert!(p.find("xyzdef").is_none());
}
#[test]
fn test_match_star_eq_non_greedy() {
let p = build_pattern("*=").unwrap();
let (off, m) = p.find("a=b=c").unwrap();
assert_eq!(off, 1);
assert_eq!(m.consumed, 1);
assert_eq!(m.matched, 1);
}
#[test]
fn test_match_whitespace_eq() {
let p = build_pattern("\\s=").unwrap();
let (off, m) = p.find("key =val").unwrap();
assert_eq!(off, 3);
assert_eq!(m.consumed, 3);
assert_eq!(m.matched, 3);
}
#[test]
fn test_match_preserve() {
let p = build_pattern("*\\s(key=)").unwrap();
let (off, m) = p.find("hello key=value").unwrap();
assert_eq!(off, 5);
assert_eq!(m.consumed, 2);
assert_eq!(m.matched, 6); }
#[test]
fn test_match_field_any() {
let p = build_pattern("field?:").unwrap();
let (off, m) = p.find("fieldA:value").unwrap();
assert_eq!(off, 0);
assert_eq!(m.consumed, 7);
assert_eq!(m.matched, 7);
}
#[test]
fn test_match_horizontal_whitespace() {
let p = build_pattern("\\h:\\h").unwrap();
let (off, m) = p.find("key\t:\tval").unwrap();
assert_eq!(off, 3);
assert_eq!(m.consumed, 3);
assert_eq!(m.matched, 3);
}
#[test]
fn test_match_non_whitespace() {
let p = build_pattern("\\s\\S=").unwrap();
let (off, m) = p.find("msg=Test message externalId=0").unwrap();
assert_eq!(off, 16); assert_eq!(m.consumed, 12); assert_eq!(m.matched, 12);
}
#[test]
fn test_match_non_whitespace_preserve_kvarr() {
let p = build_pattern("\\s(\\S=)").unwrap();
let (off, m) = p.find("msg=Test message externalId=0").unwrap();
assert_eq!(off, 16); assert_eq!(m.consumed, 1); assert_eq!(m.matched, 12); }
#[test]
fn test_match_non_horizontal_whitespace() {
let p = build_pattern("\\H=").unwrap();
let (off, m) = p.find("key\t:\tval\texternalId=0").unwrap();
assert_eq!(off, 10);
assert_eq!(m.consumed, 11); }
#[test]
fn test_match_no_match() {
let p = build_pattern("\\s=").unwrap();
assert!(p.find("key=val").is_none());
}
#[test]
fn test_match_at_start_literal() {
let p = build_pattern("abc").unwrap();
let m = p.match_at_start("abcdef").unwrap();
assert_eq!(m.consumed, 3);
assert!(p.match_at_start("xabc").is_none());
}
#[test]
fn test_match_at_start_glob() {
let p = build_pattern("\\s=").unwrap();
let m = p.match_at_start(" =val").unwrap();
assert_eq!(m.consumed, 3);
assert!(p.match_at_start("val =").is_none());
}
#[test]
fn test_match_star_at_end() {
let p = build_pattern("key=*").unwrap();
let (off, m) = p.find("key=value").unwrap();
assert_eq!(off, 0);
assert_eq!(m.consumed, 4); assert_eq!(m.matched, 4);
}
#[test]
fn test_match_star_newline() {
let p = build_pattern("\\s=*\\n").unwrap();
let (off, m) = p.find(" =hello\n").unwrap();
assert_eq!(off, 0);
assert_eq!(m.consumed, 9);
}
#[test]
fn test_match_preserve_only() {
let p = build_pattern("(abc)").unwrap();
match &p.compiled {
SepMatcher::Glob(g) => {
assert!(g.segments.is_empty());
assert!(g.preserve.is_some());
}
_ => panic!("expected Glob"),
}
let (off, m) = p.find("abcdef").unwrap();
assert_eq!(off, 0);
assert_eq!(m.consumed, 0);
assert_eq!(m.matched, 3);
let (off, m) = p.find("xyzabcdef").unwrap();
assert_eq!(off, 3);
assert_eq!(m.consumed, 0);
assert_eq!(m.matched, 3);
assert!(p.find("xyzdef").is_none());
}
#[test]
fn test_match_preserve_only_command() {
let p = build_pattern("(command=)").unwrap();
let (off, m) = p.find("hello command=value").unwrap();
assert_eq!(off, 6); assert_eq!(m.consumed, 0); assert_eq!(m.matched, 8);
let (off, m) = p.find("command=value").unwrap();
assert_eq!(off, 0);
assert_eq!(m.consumed, 0);
assert_eq!(m.matched, 8);
}
#[test]
fn test_match_preserve_with_star() {
let p = build_pattern("(c*=)").unwrap();
let (off, m) = p.find("hello cmd=value").unwrap();
assert_eq!(off, 6); assert_eq!(m.consumed, 0);
assert_eq!(m.matched, 4);
let (off, m) = p.find("hello cat=1 cmd=2").unwrap();
assert_eq!(off, 6); assert_eq!(m.consumed, 0);
assert_eq!(m.matched, 4); }
#[test]
fn test_serde_roundtrip() {
let p = build_pattern("*\\s(key=)").unwrap();
let json = serde_json::to_string(&p).unwrap();
assert_eq!(json, r#""*\\s(key=)""#);
let p2: SepPattern = serde_json::from_str(&json).unwrap();
assert_eq!(p.raw, p2.raw);
assert_eq!(p.compiled, p2.compiled);
}
#[test]
fn test_serde_roundtrip_literal() {
let p = build_pattern("abc").unwrap();
let json = serde_json::to_string(&p).unwrap();
let p2: SepPattern = serde_json::from_str(&json).unwrap();
assert_eq!(p, p2);
}
}