Skip to main content

wpl/parser/
utils.rs

1use smol_str::SmolStr;
2use winnow::ascii::{multispace0, take_escaped};
3use winnow::combinator::{alt, delimited, fail, opt, peek, preceded, separated_pair};
4use winnow::error::{ContextError, ErrMode};
5use winnow::stream::Stream;
6use winnow::token::{any, literal, none_of, one_of, take, take_until, take_while};
7use wp_model_core::model::Value;
8use wp_primitives::Parser;
9use wp_primitives::WResult;
10
11use wp_primitives::symbol::ctx_desc;
12
13//#[allow(clippy::nonminimal_bool)]
14pub fn take_ref_path<'a>(input: &mut &'a str) -> WResult<&'a str> {
15    let s = *input;
16    let mut pos = 0usize;
17    let mut paren_depth = 0usize;
18
19    while pos < s.len() {
20        let rest = &s[pos..];
21        let ch = rest.chars().next().expect("pos bounds checked");
22        let ch_len = ch.len_utf8();
23        match ch {
24            ')' if paren_depth == 0 => break,
25            '(' => {
26                paren_depth += 1;
27                pos += ch_len;
28            }
29            ')' => {
30                paren_depth = paren_depth.saturating_sub(1);
31                pos += ch_len;
32            }
33            '<' => {
34                if let Some(seg_len) = parse_wrapped_key_segment(rest, '<', '>') {
35                    pos += seg_len;
36                } else {
37                    break;
38                }
39            }
40            '{' => {
41                if let Some(seg_len) = parse_wrapped_key_segment(rest, '{', '}') {
42                    pos += seg_len;
43                } else {
44                    break;
45                }
46            }
47            '>' | '}' => break,
48            _ => {
49                let allowed =
50                    ch.is_alphanumeric() || matches!(ch, '_' | '/' | '-' | '.' | '[' | ']' | '*');
51                if !allowed {
52                    break;
53                }
54                pos += ch_len;
55            }
56        }
57    }
58
59    if pos == 0 {
60        return fail.context(ctx_desc("<ref_path>")).parse_next(input);
61    }
62
63    let (head, tail) = s.split_at(pos);
64    *input = tail;
65    Ok(head)
66}
67
68fn parse_wrapped_key_segment(s: &str, open: char, close: char) -> Option<usize> {
69    let mut iter = s.char_indices();
70    let (_, first) = iter.next()?;
71    if first != open {
72        return None;
73    }
74    let mut has_inner = false;
75    for (idx, ch) in iter {
76        if ch == close {
77            return has_inner.then_some(idx + ch.len_utf8());
78        }
79        if !is_wrapped_key_char(ch) {
80            return None;
81        }
82        has_inner = true;
83    }
84    None
85}
86
87#[inline]
88fn is_wrapped_key_char(ch: char) -> bool {
89    ch.is_alphanumeric() || matches!(ch, '_' | '/' | '-' | '.')
90}
91
92/// Parse field reference path: supports either bare identifiers or single-quoted strings
93/// Examples: `@field_name`, `@'@special-field'`
94/// Single quotes are raw strings - only \' is escaped
95pub fn take_ref_path_or_quoted(input: &mut &str) -> WResult<String> {
96    alt((
97        single_quot_raw_str,
98        take_ref_path.map(|s: &str| s.to_string()),
99    ))
100    .parse_next(input)
101}
102pub fn take_exact_path<'a>(input: &mut &'a str) -> WResult<&'a str> {
103    take_while(1.., |c: char| {
104        c.is_alphanumeric() || c == '_' || c == '/' || c == '-' || c == '.'
105    })
106    .parse_next(input)
107}
108
109pub fn take_key<'a>(input: &mut &'a str) -> WResult<&'a str> {
110    take_while(1.., |c: char| {
111        c.is_alphanumeric() || c == '_' || c == '/' || c == '-' || c == '.'
112    })
113    .parse_next(input)
114}
115
116pub fn take_kv_key<'a>(input: &mut &'a str) -> WResult<&'a str> {
117    take_while(1.., |c: char| {
118        c.is_alphanumeric()
119            || matches!(
120                c,
121                '_' | '/' | '-' | '.' | '(' | ')' | '<' | '>' | '[' | ']' | '{' | '}'
122            )
123    })
124    .parse_next(input)
125}
126
127pub fn take_var_name<'a>(input: &mut &'a str) -> WResult<&'a str> {
128    take_while(1.., |c: char| {
129        c.is_alphanumeric() || c == '_' || c == '.' || c == '-'
130    })
131    .parse_next(input)
132}
133
134pub fn take_fun_name<'a>(input: &mut &'a str) -> WResult<&'a str> {
135    //trace("var_name", move |input: &mut &'a str| {
136    take_while(1.., |c: char| c.is_alphanumeric() || c == '_' || c == '.').parse_next(input)
137    //})
138    //.parse_next(input)
139}
140
141pub fn take_meta_name<'a>(input: &mut &'a str) -> WResult<&'a str> {
142    //trace("keyword", move |input: &mut &'a str| {
143    take_while(1.., |c: char| c.is_alphanumeric() || c == '_' || c == '/').parse_next(input)
144    //})
145    //.parse_next(input)
146}
147
148pub fn take_sql_tval(input: &mut &str) -> WResult<Value> {
149    let chars = opt(alt((
150        delimited('"', take_until(0.., "\""), '"'),
151        delimited('\'', take_until(0.., "'"), '\''),
152    )))
153    .parse_next(input)?;
154    if let Some(chars) = chars {
155        return Ok(Value::Chars(chars.into()));
156    }
157    if let Some(value) = opt(take_while(0.., ('0'..='9', '.', '-', '+'))).parse_next(input)? {
158        if let Ok(digit) = value.parse::<i64>() {
159            return Ok(Value::Digit(digit));
160        } else {
161            return Ok(Value::Float(value.parse::<f64>().unwrap_or(0.0)));
162        }
163    }
164
165    //fail get value;
166    "fail-value".parse_next(input)?;
167    Ok(Value::Chars("fail-value".into()))
168}
169
170#[inline]
171pub fn quot_str<'a>(input: &mut &'a str) -> WResult<&'a str> {
172    alt((
173        duble_quot_str_impl.context(ctx_desc(
174            "<quoted_string>::= '\"' , <character_sequence> , '\"' ",
175        )),
176        single_quot_str_impl.context(ctx_desc(
177            "<quoted_string>::= '\"' , <character_sequence> , '\"' ",
178        )),
179    ))
180    .parse_next(input)
181}
182#[inline]
183pub fn interval_data<'a>(input: &mut &'a str) -> WResult<&'a str> {
184    interval_impl
185        .context(ctx_desc("extract bracketed segments: (), [], {}, <>"))
186        .parse_next(input)
187}
188
189// 不要匹配 ‘\’ 和 ‘“’
190// 引号字符串:允许任意非引号/反斜杠字符,转义支持 \" \\ \n \t \r \xHH
191#[inline]
192pub fn duble_quot_str_impl<'a>(input: &mut &'a str) -> WResult<&'a str> {
193    literal('"')
194        .context(ctx_desc("<beg>\""))
195        .parse_next(input)?;
196    let content = take_escaped(none_of(['\\', '"']), '\\', any).parse_next(input)?;
197    literal('"')
198        .context(ctx_desc("<end>\""))
199        .parse_next(input)?;
200    Ok(content)
201}
202#[inline]
203pub fn single_quot_str_impl<'a>(input: &mut &'a str) -> WResult<&'a str> {
204    literal('\'')
205        .context(ctx_desc("<beg>'"))
206        .parse_next(input)?;
207    let content = take_escaped(none_of(['\\', '\'']), '\\', any).parse_next(input)?;
208    literal('\'')
209        .context(ctx_desc("<end>'"))
210        .parse_next(input)?;
211    Ok(content)
212}
213
214/// Parse single-quoted raw string: only \' is escaped, others are literal
215/// Used for field references where single quotes represent raw strings
216#[inline]
217pub fn single_quot_raw_str(input: &mut &str) -> WResult<String> {
218    literal('\'')
219        .context(ctx_desc("<beg>'"))
220        .parse_next(input)?;
221
222    let mut result = String::new();
223    let mut chars = input.chars();
224
225    loop {
226        match chars.next() {
227            None => {
228                return fail
229                    .context(ctx_desc("unclosed single quote"))
230                    .parse_next(input);
231            }
232            Some('\\') => {
233                // Only handle \' escape, others are literal
234                match chars.as_str().chars().next() {
235                    Some('\'') => {
236                        result.push('\'');
237                        chars.next(); // consume '
238                    }
239                    _ => {
240                        // Other cases, \ is literal
241                        result.push('\\');
242                    }
243                }
244            }
245            Some('\'') => {
246                // Closing quote
247                let consumed = input.len() - chars.as_str().len();
248                *input = &input[consumed..];
249                return Ok(result);
250            }
251            Some(ch) => result.push(ch),
252        }
253    }
254}
255
256#[inline]
257pub fn interval_impl<'a>(input: &mut &'a str) -> WResult<&'a str> {
258    let s = *input;
259    let mut chars = s.char_indices();
260    let Some((_, first)) = chars.next() else {
261        return fail
262            .context(ctx_desc("interval requires leading bracket"))
263            .parse_next(input);
264    };
265
266    let Some(first_close) = closing_for_bracket(first) else {
267        return fail
268            .context(ctx_desc("interval must start with [ ( { <"))
269            .parse_next(input);
270    };
271    let mut stack: Vec<char> = vec![first_close];
272    let mut iter = chars.peekable();
273
274    while let Some((idx, ch)) = iter.next() {
275        if ch == '\\' {
276            // skip escaped character outside of quoted sections
277            let _ = iter.next();
278            continue;
279        }
280        match ch {
281            '[' | '(' | '{' | '<' => {
282                if let Some(close) = closing_for_bracket(ch) {
283                    stack.push(close);
284                }
285            }
286            ']' | ')' | '}' | '>' => {
287                let expected = stack.pop().unwrap();
288                if ch != expected {
289                    return fail
290                        .context(ctx_desc("interval bracket mismatch"))
291                        .parse_next(input);
292                }
293                if stack.is_empty() {
294                    let end = idx + ch.len_utf8();
295                    let (matched, rest) = s.split_at(end);
296                    *input = rest;
297                    return Ok(matched);
298                }
299            }
300            '"' | '\'' => {
301                let quote = ch;
302                let mut escaped = false;
303                for (_, qc) in iter.by_ref() {
304                    if escaped {
305                        escaped = false;
306                        continue;
307                    }
308                    if qc == '\\' {
309                        escaped = true;
310                        continue;
311                    }
312                    if qc == quote {
313                        break;
314                    }
315                }
316            }
317            _ => {}
318        }
319    }
320
321    fail.context(ctx_desc("interval missing closing bracket"))
322        .parse_next(input)
323}
324
325fn closing_for_bracket(ch: char) -> Option<char> {
326    match ch {
327        '[' => Some(']'),
328        '(' => Some(')'),
329        '{' => Some('}'),
330        '<' => Some('>'),
331        _ => None,
332    }
333}
334
335pub fn window_path<'a>(input: &mut &'a str) -> WResult<&'a str> {
336    literal('"').parse_next(input)?;
337    let content = take_until(0.., "\"").parse_next(input)?;
338    literal('"').parse_next(input)?;
339    Ok(content)
340}
341
342/// 原始字符串(首选):r#"..."#,内容不做转义处理;
343/// 兼容旧写法:r"..."(仅为向后兼容,未来可能移除)。
344pub fn quot_r_str<'a>(input: &mut &'a str) -> WResult<&'a str> {
345    let s = *input;
346    // 优先解析 r#"..."#
347    if let Some(rest) = s.strip_prefix("r#\"") {
348        if let Some(pos) = rest.find("\"#") {
349            let content = &rest[..pos];
350            let new_rest = &rest[pos + 2..];
351            *input = new_rest;
352            return Ok(content);
353        } else {
354            return fail
355                .context(ctx_desc("raw string not closed: r#\"...\"#"))
356                .parse_next(input);
357        }
358    }
359    // 回退兼容 r"..."
360    if let Some(rest) = s.strip_prefix("r\"") {
361        if let Some(pos) = rest.find('"') {
362            let content = &rest[..pos];
363            let new_rest = &rest[pos + 1..];
364            *input = new_rest;
365            return Ok(content);
366        } else {
367            return fail
368                .context(ctx_desc("raw string not closed: r\"...\""))
369                .parse_next(input);
370        }
371    }
372    // 不匹配
373    fail.parse_next(input)
374}
375
376pub fn quot_raw<'a>(input: &mut &'a str) -> WResult<&'a str> {
377    let cp = input.checkpoint();
378    literal('"').parse_next(input)?;
379    let content = take_escaped(none_of(['\\', '"']), '\\', any).parse_next(input)?;
380    literal('"').parse_next(input)?;
381    let len = content.len() + 2;
382    input.reset(&cp);
383    let raw = take(len).parse_next(input)?;
384    Ok(raw)
385}
386
387pub fn take_parentheses<'a>(input: &mut &'a str) -> WResult<&'a str> {
388    literal('(').parse_next(input)?;
389    let content = take_escaped(none_of(['\\', ')']), '\\', one_of([')'])).parse_next(input)?;
390    literal(')').parse_next(input)?;
391    Ok(content)
392}
393
394// #[tag(tag : "hello", raw_copy : "raw" ), copy_raw(name:"hello")]
395pub fn decode_escapes(s: &str) -> String {
396    let mut out: Vec<u8> = Vec::with_capacity(s.len());
397    let mut it = s.chars().peekable();
398    while let Some(c) = it.next() {
399        if c == '\\' {
400            match it.peek().copied() {
401                Some('"') => {
402                    out.push(b'"');
403                    it.next();
404                }
405                Some('\'') => {
406                    out.push(b'\'');
407                    it.next();
408                }
409                Some('\\') => {
410                    out.push(b'\\');
411                    it.next();
412                }
413                Some('n') => {
414                    out.push(b'\n');
415                    it.next();
416                }
417                Some('t') => {
418                    out.push(b'\t');
419                    it.next();
420                }
421                Some('r') => {
422                    out.push(b'\r');
423                    it.next();
424                }
425                Some('x') => {
426                    it.next();
427                    let h1 = it.next();
428                    let h2 = it.next();
429                    if let (Some(h1), Some(h2)) = (h1, h2) {
430                        let hex = [h1, h2];
431                        let val = hex
432                            .iter()
433                            .try_fold(0u8, |v, ch| ch.to_digit(16).map(|d| (v << 4) | (d as u8)));
434                        if let Some(b) = val {
435                            out.push(b);
436                        } else {
437                            out.extend_from_slice(b"\\x");
438                            out.extend_from_slice(h1.to_string().as_bytes());
439                            out.extend_from_slice(h2.to_string().as_bytes());
440                        }
441                    } else {
442                        out.extend_from_slice(b"\\x");
443                        if let Some(h1) = h1 {
444                            out.extend_from_slice(h1.to_string().as_bytes());
445                        }
446                        if let Some(h2) = h2 {
447                            out.extend_from_slice(h2.to_string().as_bytes());
448                        }
449                    }
450                }
451                Some(ch) => {
452                    out.push(b'\\');
453                    out.extend_from_slice(ch.to_string().as_bytes());
454                    it.next();
455                }
456                None => {}
457            }
458        } else {
459            let mut buf = [0u8; 4];
460            let s = c.encode_utf8(&mut buf);
461            out.extend_from_slice(s.as_bytes());
462        }
463    }
464    String::from_utf8_lossy(&out).to_string()
465}
466
467pub fn take_tag_kv(input: &mut &str) -> WResult<(SmolStr, SmolStr)> {
468    // 值支持普通引号字符串与原始字符串;普通字符串会做一次反转义,原始字符串保持原样
469    separated_pair(
470        preceded(multispace0, take_key),
471        (multispace0, ':', multispace0),
472        alt((
473            quot_r_str.map(|s: &str| SmolStr::from(s)),
474            quot_str.map(|s: &str| SmolStr::from(decode_escapes(s))),
475        )),
476    )
477    .map(|(k, v)| (SmolStr::from(k), v))
478    .parse_next(input)
479}
480
481#[inline]
482pub fn take_to_end<'a>(input: &mut &'a str) -> WResult<&'a str> {
483    //trace("take_to_end", move |input: &mut &'a str| {
484    take_while(0.., |_| true).parse_next(input)
485    //})
486    //.parse_next(input)
487}
488
489pub fn peek_str(what: &str, input: &mut &str) -> WResult<()> {
490    // In winnow 0.7, `peek` over a string may produce `Result<_, ContextError>`.
491    // Convert it into `ModalResult<()>` by wrapping the error in `ErrMode`.
492    match peek(what).parse_next(input) {
493        Ok(_) => Ok(()),
494        Err(e) => Err(ErrMode::Backtrack(e)),
495    }
496}
497
498pub fn peek_next<'a, O, ParseNext>(parser: ParseNext, input: &mut &'a str) -> WResult<()>
499where
500    ParseNext: Parser<&'a str, O, ContextError>,
501{
502    match peek(parser).parse_next(input) {
503        Ok(_) => Ok(()),
504        Err(e) => Err(ErrMode::Backtrack(e)),
505    }
506}
507pub fn is_sep_next(input: &mut &str) -> bool {
508    let _ = multispace0::<&str, ErrMode<ContextError>>.parse_next(input);
509    if peek_str(",", input).is_ok() {
510        let _: Result<&str, ErrMode<ContextError>> = literal(",").parse_next(input);
511        return true;
512    }
513    false
514}
515pub fn is_next_unit(prefix: &str, input: &mut &str) -> bool {
516    let _ = multispace0::<&str, ErrMode<ContextError>>.parse_next(input);
517    if peek_str(prefix, input).is_ok() {
518        return true;
519    }
520    false
521}
522
523pub fn is_next<'a, O, ParseNext>(parser: ParseNext, input: &mut &'a str) -> bool
524where
525    ParseNext: Parser<&'a str, O, ContextError>,
526{
527    let _ = multispace0::<&str, ErrMode<ContextError>>.parse_next(input);
528    if peek_next(parser, input).is_ok() {
529        return true;
530    }
531    false
532}
533
534#[cfg(test)]
535mod tests {
536    use super::*;
537    use crate::parser::error::error_detail;
538    use crate::parser::utils::{quot_str, take_key, take_kv_key, take_parentheses, take_to_end};
539    use crate::parser::wpl_pkg::wpl_package;
540    use orion_error::TestAssert;
541    use winnow::LocatingSlice;
542    use wp_primitives::WResult as ModalResult;
543
544    #[test]
545    fn test_take_val() -> ModalResult<()> {
546        assert_eq!(
547            Value::Chars("key".into()),
548            take_sql_tval.parse_next(&mut "'key'")?
549        );
550        assert_eq!(Value::Digit(100), take_sql_tval.parse_next(&mut "100")?);
551        assert_eq!(
552            Value::Float(100.01),
553            take_sql_tval.parse_next(&mut "100.01")?
554        );
555        assert_eq!(
556            Value::Float(-100.01),
557            take_sql_tval.parse_next(&mut "-100.01")?
558        );
559        Ok(())
560    }
561
562    #[test]
563    fn test_key_ident() {
564        assert_eq!(Ok(("", "key")), take_key.parse_peek("key"));
565        assert_eq!(Ok(("!", "key")), take_key.parse_peek("key!"));
566        assert_eq!(
567            Ok(("!", "http/request")),
568            take_key.parse_peek("http/request!")
569        );
570        assert_eq!(
571            Ok(("!", "123http/request")),
572            take_key.parse_peek("123http/request!")
573        );
574    }
575    #[test]
576    fn test_kv_key_ident() {
577        // basic key chars (same as take_key)
578        assert_eq!(Ok(("", "key")), take_kv_key.parse_peek("key"));
579        assert_eq!(Ok(("!", "key")), take_kv_key.parse_peek("key!"));
580        assert_eq!(
581            Ok(("!", "http/request")),
582            take_kv_key.parse_peek("http/request!")
583        );
584        // parentheses
585        assert_eq!(Ok(("=v", "fn(arg)")), take_kv_key.parse_peek("fn(arg)=v"));
586        // angle brackets
587        assert_eq!(
588            Ok(("=1", "list<int>")),
589            take_kv_key.parse_peek("list<int>=1")
590        );
591        // square brackets
592        assert_eq!(Ok((":x", "arr[0]")), take_kv_key.parse_peek("arr[0]:x"));
593        // curly braces
594        assert_eq!(Ok(("=ok", "set{a}")), take_kv_key.parse_peek("set{a}=ok"));
595        // mixed brackets
596        assert_eq!(
597            Ok(("=v", "a(b)[c]<d>{e}")),
598            take_kv_key.parse_peek("a(b)[c]<d>{e}=v")
599        );
600        // stops at '=' and ':'
601        assert_eq!(Ok(("=val", "key(x)")), take_kv_key.parse_peek("key(x)=val"));
602        assert_eq!(Ok((":val", "key(x)")), take_kv_key.parse_peek("key(x):val"));
603    }
604    #[test]
605    fn test_quot_str() {
606        assert_eq!(quot_str.parse_peek("\"123\""), Ok(("", "123")));
607        assert_eq!(quot_str.parse_peek(r#""\a123""#), Ok(("", r#"\a123"#)));
608        assert_eq!(quot_str.parse_peek("'123'"), Ok(("", "123")));
609        assert_eq!(quot_str.parse_peek("\"1-?#ab\""), Ok(("", "1-?#ab")));
610        assert_eq!(quot_str.parse_peek(r#""12\"3""#), Ok(("", r#"12\"3"#)));
611        assert_eq!(quot_str.parse_peek(r#"'12\"3'"#), Ok(("", r#"12\"3"#)));
612        // 支持 Unicode
613        assert_eq!(quot_str.parse_peek("\"中文🙂\""), Ok(("", "中文🙂")));
614        //assert_eq!(quot_str.parse_peek(r#""sddD:\招标项目\6-MSS\mss日志映射表""#),
615        assert_eq!(
616            window_path.parse_peek(r#""sddD:\招标项目\6-MSS\mss日志映射表""#),
617            Ok(("", r#"sddD:\招标项目\6-MSS\mss日志映射表"#))
618        );
619    }
620    #[test]
621    fn test_quot_r_str() {
622        use crate::parser::utils::quot_r_str;
623        // r#"..."# 支持内部包含引号
624        assert_eq!(
625            quot_r_str.parse_peek("r#\"a\\b \"c\"\"#"),
626            Ok(("", "a\\b \"c\""))
627        );
628        assert_eq!(quot_r_str.parse_peek("r#\"end\"#"), Ok(("", "end")));
629        // 兼容旧写法 r"..."
630        assert_eq!(quot_r_str.parse_peek("r\"raw\""), Ok(("", "raw")));
631    }
632    #[test]
633    fn test_take_pat() {
634        assert_eq!(take_parentheses.parse_peek("(123)"), Ok(("", "123")));
635        assert_eq!(
636            take_parentheses.parse_peek(r#"(12\)3)"#),
637            Ok(("", r#"12\)3"#))
638        );
639    }
640
641    #[test]
642    fn test_take_to_end() {
643        let input = "";
644        let x = take_to_end.parse(input).assert();
645        assert_eq!(x, "");
646
647        let input = "hello 你好 😂 😁 π \u{3001} \n \t en";
648        let x = take_to_end.parse(input).assert();
649        assert_eq!(x, input);
650    }
651
652    #[test]
653    fn test_prefix() {
654        let data = "{ (digit, time,sn,chars,time,kv,ip,kv,chars,kv,kv,chars,kv,kv,chars,chars,ip,chars,http/request,http/agent)}";
655        if let Err(err) = crate::parser::parse_code::segment.parse(data) {
656            println!("{}", error_detail(err));
657        }
658        assert_eq!(
659            crate::parser::parse_code::segment
660                .parse(data)
661                .assert()
662                .to_string(),
663            r#"  (
664    digit,
665    time,
666    sn,
667    chars,
668    time,
669    kv,
670    ip,
671    kv,
672    chars,
673    kv,
674    kv,
675    chars,
676    kv,
677    kv,
678    chars,
679    chars,
680    ip,
681    chars,
682    http/request,
683    http/agent
684  )"#
685        );
686    }
687    #[test]
688    fn test_meta() {
689        let input = r#"    package test {
690                rule test { (
691                time,
692                time_timestamp
693                ) }
694        }
695    "#;
696
697        assert_eq!(
698            wpl_package
699                .parse(&LocatingSlice::new(input))
700                .assert()
701                .to_string(),
702            r#"package test {
703  rule test {
704    (
705      time,
706      time_timestamp
707    )
708  }
709}
710"#
711        );
712    }
713
714    #[test]
715    fn test_tag_kv_hex_escape() {
716        use super::take_tag_kv;
717        let mut s = "key:\"\\xE4\\xB8\\xAD\\xE6\\x96\\x87\"";
718        let (k, v) = take_tag_kv.parse_next(&mut s).assert();
719        assert_eq!(k, "key");
720        assert_eq!(v, "中文");
721    }
722
723    #[test]
724    fn test_interval_simple() {
725        let mut input = "{payload}";
726        let parsed = interval_data(&mut input).assert();
727        assert_eq!(parsed, "{payload}");
728        assert_eq!(input, "");
729    }
730
731    #[test]
732    fn test_interval_nested_with_quotes() {
733        let mut input = "<({\"[(foo)]\"}, ['x'])>tail";
734        let parsed = interval_data(&mut input).assert();
735        assert_eq!(parsed, "<({\"[(foo)]\"}, ['x'])>");
736        assert_eq!(input, "tail");
737    }
738
739    #[test]
740    fn test_interval_missing_closer() {
741        let mut input = "[1,2";
742        assert!(interval_data(&mut input).is_err());
743    }
744
745    #[test]
746    fn test_take_ref_path_or_quoted() {
747        // Test bare identifier
748        assert_eq!(
749            take_ref_path_or_quoted.parse_peek("field_name"),
750            Ok(("", "field_name".to_string()))
751        );
752
753        // Test single-quoted with @ prefix
754        assert_eq!(
755            take_ref_path_or_quoted.parse_peek("'@abc'"),
756            Ok(("", "@abc".to_string()))
757        );
758
759        // Test single-quoted with spaces
760        assert_eq!(
761            take_ref_path_or_quoted.parse_peek("'field with spaces'"),
762            Ok(("", "field with spaces".to_string()))
763        );
764
765        // Test single-quoted with special characters
766        assert_eq!(
767            take_ref_path_or_quoted.parse_peek("'@special-field#123'"),
768            Ok(("", "@special-field#123".to_string()))
769        );
770
771        // Test escaped quote inside single-quoted string
772        let input = "'field\\'s name'";
773        assert_eq!(
774            take_ref_path_or_quoted.parse_peek(input),
775            Ok(("", "field's name".to_string()))
776        );
777
778        // Test path-like identifier
779        assert_eq!(
780            take_ref_path_or_quoted.parse_peek("process/path[0]"),
781            Ok(("", "process/path[0]".to_string()))
782        );
783        assert_eq!(
784            take_ref_path_or_quoted.parse_peek("list<int>"),
785            Ok(("", "list<int>".to_string()))
786        );
787        assert_eq!(
788            take_ref_path_or_quoted.parse_peek("set{a}"),
789            Ok(("", "set{a}".to_string()))
790        );
791        assert_eq!(
792            take_ref_path_or_quoted.parse_peek("curr<[,]>"),
793            Ok(("<[,]>", "curr".to_string()))
794        );
795        assert_eq!(
796            take_ref_path_or_quoted.parse_peek("curr{\\s(\\S=)}"),
797            Ok(("{\\s(\\S=)}", "curr".to_string()))
798        );
799
800        assert_eq!(
801            take_ref_path_or_quoted.parse_peek("protocal(80)"),
802            Ok(("", "protocal(80)".to_string()))
803        );
804        assert_eq!(
805            take_ref_path_or_quoted.parse_peek("protocal(80))"),
806            Ok((")", "protocal(80)".to_string()))
807        );
808
809        // Test single quotes are raw strings - \n, \t are literal
810        assert_eq!(
811            take_ref_path_or_quoted.parse_peek(r"'raw\nstring'"),
812            Ok(("", r"raw\nstring".to_string()))
813        );
814
815        assert_eq!(
816            take_ref_path_or_quoted.parse_peek(r"'path\to\file'"),
817            Ok(("", r"path\to\file".to_string()))
818        );
819
820        // Only \' is escaped in single quotes
821        assert_eq!(
822            take_ref_path_or_quoted.parse_peek(r"'it\'s here'"),
823            Ok(("", "it's here".to_string()))
824        );
825    }
826}