Skip to main content

wpl/ast/syntax/
wpl_sep.rs

1use crate::ast::syntax::sep_pattern::SepPattern;
2use crate::ast::{GenFmt, WplFmt};
3use crate::parser::utils::{quot_r_str, quot_str, take_to_end};
4use derive_getters::Getters;
5use smol_str::SmolStr;
6use std::fmt::{Display, Formatter};
7use std::marker::PhantomData;
8use winnow::combinator::{alt, opt, separated};
9use winnow::stream::Range;
10use winnow::token::{literal, take_until, take_while};
11use wp_primitives::Parser;
12use wp_primitives::WResult;
13use wp_primitives::symbol::ctx_desc;
14
15const DEFAULT_SEP: &str = " ";
16pub trait DefaultSep {
17    fn sep_str() -> &'static str;
18}
19impl DefaultSep for () {
20    fn sep_str() -> &'static str {
21        DEFAULT_SEP
22    }
23}
24
25#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Getters)]
26pub struct WplSepT<T> {
27    prio: usize,
28    cur_val: Option<SepEnum>,
29    ups_val: Option<SmolStr>,
30    infer: bool,
31    is_take: bool,
32    #[serde(skip)]
33    _phant: PhantomData<T>,
34}
35pub type WplSep = WplSepT<()>;
36
37impl<T> WplSepT<T> {
38    pub fn from(value: &WplSep) -> Self {
39        WplSepT {
40            prio: value.prio,
41            cur_val: value.cur_val.clone(),
42            ups_val: value.ups_val.clone(),
43            infer: value.infer,
44            is_take: value.is_take,
45            _phant: PhantomData,
46        }
47    }
48}
49
50#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
51pub enum SepEnum {
52    Str(SmolStr),
53    End,
54    Whitespace, // Matches space or tab
55    Pattern(SepPattern),
56}
57impl From<&str> for SepEnum {
58    fn from(value: &str) -> Self {
59        if value == "\\0" || value == "0" {
60            SepEnum::End
61        } else if value == "\\s" || value == "s" {
62            SepEnum::Str(" ".into())
63        } else if value == "\\t" || value == "t" {
64            SepEnum::Str("\t".into())
65        } else if value == "\\S" || value == "S" {
66            SepEnum::Whitespace
67        } else {
68            SepEnum::Str(value.into())
69        }
70    }
71}
72
73impl From<String> for SepEnum {
74    fn from(value: String) -> Self {
75        Self::from(value.as_str())
76    }
77}
78
79impl From<SmolStr> for SepEnum {
80    fn from(value: SmolStr) -> Self {
81        Self::from(value.as_str())
82    }
83}
84impl<T> Default for WplSepT<T> {
85    fn default() -> Self {
86        Self {
87            prio: 1,
88            cur_val: None,
89            ups_val: None,
90            infer: false,
91            is_take: true,
92            _phant: PhantomData,
93        }
94    }
95}
96
97impl<T: DefaultSep + Clone> WplSepT<T> {
98    /// 字段级分隔符(优先级 3),覆盖组级与上游
99    pub fn field_sep<S: Into<SmolStr>>(val: S) -> Self {
100        Self {
101            prio: 3,
102            cur_val: Some(SepEnum::from(val.into())),
103            ups_val: None,
104            infer: false,
105            is_take: true,
106            _phant: PhantomData,
107        }
108    }
109
110    pub fn apply_default(&mut self, other: WplSep) {
111        if other.prio > self.prio || self.cur_val.is_none() {
112            self.prio = other.prio;
113            self.cur_val = other.cur_val;
114            // Pattern separators do not support ups_val; clear it to avoid stale state.
115            if matches!(&self.cur_val, Some(SepEnum::Pattern(_))) {
116                self.ups_val = None;
117            }
118        }
119    }
120    pub fn set_current<S: Into<SmolStr>>(&mut self, sep: S) {
121        self.cur_val = Some(SepEnum::from(sep.into()))
122    }
123    pub fn is_unset(&self) -> bool {
124        self.cur_val().is_none()
125    }
126    pub fn is_to_end(&self) -> bool {
127        if let Some(x) = &self.cur_val {
128            *x == SepEnum::End
129        } else {
130            false
131        }
132    }
133    pub fn override_with(&mut self, other: &WplSep) {
134        if other.prio > self.prio {
135            self.prio = other.prio;
136            self.cur_val = other.cur_val.clone();
137            // Pattern separators do not support ups_val; clear it to avoid stale state.
138            if matches!(&self.cur_val, Some(SepEnum::Pattern(_))) {
139                self.ups_val = None;
140            }
141        }
142    }
143    pub fn sep_str(&self) -> &str {
144        if let Some(val) = &self.cur_val {
145            match val {
146                SepEnum::Str(str) => str.as_str(),
147                SepEnum::End => "\n",
148                SepEnum::Whitespace => " ", // Default to space for display
149                SepEnum::Pattern(p) => p.raw(),
150            }
151        } else {
152            T::sep_str()
153            //DEFAULT_SEP
154        }
155    }
156    pub fn inherited_sep<S: Into<SmolStr>>(val: S) -> Self {
157        Self {
158            prio: 1,
159            cur_val: Some(SepEnum::from(val.into())),
160            ups_val: None,
161            infer: false,
162            is_take: true,
163            ..Default::default()
164        }
165    }
166    pub fn infer_inherited_sep<S: Into<SmolStr>>(val: S) -> Self {
167        Self {
168            prio: 1,
169            cur_val: Some(SepEnum::from(val.into())),
170            ups_val: None,
171            infer: true,
172            is_take: true,
173            ..Default::default()
174        }
175    }
176    pub fn infer_group_sep<S: Into<SmolStr>>(val: S) -> Self {
177        Self {
178            prio: 2,
179            cur_val: Some(SepEnum::from(val.into())),
180            ups_val: None,
181            infer: true,
182            is_take: true,
183            ..Default::default()
184        }
185    }
186    pub fn infer_clone(&self) -> Self {
187        let mut c = self.clone();
188        c.infer = true;
189        c
190    }
191    pub fn group_sep<S: Into<SmolStr>>(val: S) -> Self {
192        Self {
193            prio: 2,
194            cur_val: Some(SepEnum::from(val.into())),
195            ups_val: None,
196            infer: false,
197            is_take: true,
198            ..Default::default()
199        }
200    }
201    pub fn field_sep_until<S: Into<SmolStr>>(val: S, sec: S, is_take: bool) -> Self {
202        Self {
203            prio: 3,
204            cur_val: Some(SepEnum::from(val.into())),
205            ups_val: Some(sec.into()),
206            infer: false,
207            is_take,
208            ..Default::default()
209        }
210    }
211    pub fn infer_field_sep<S: Into<SmolStr>>(val: S) -> Self {
212        Self {
213            prio: 3,
214            cur_val: Some(SepEnum::from(val.into())),
215            ups_val: None,
216            infer: true,
217            is_take: true,
218            ..Default::default()
219        }
220    }
221    pub fn field_sep_pattern(pattern: SepPattern) -> Self {
222        Self {
223            prio: 3,
224            cur_val: Some(SepEnum::Pattern(pattern)),
225            ups_val: None,
226            infer: false,
227            is_take: true,
228            _phant: PhantomData,
229        }
230    }
231    pub fn is_pattern(&self) -> bool {
232        matches!(&self.cur_val, Some(SepEnum::Pattern(_)))
233    }
234
235    pub fn consume_sep(&self, input: &mut &str) -> WResult<()> {
236        if self.is_take {
237            if let Some(SepEnum::Whitespace) = &self.cur_val {
238                // For Whitespace, accept either space or tab
239                alt((literal(" "), literal("\t")))
240                    .context(ctx_desc("take <whitespace>"))
241                    .parse_next(input)?;
242            } else if let Some(SepEnum::Pattern(pattern)) = &self.cur_val {
243                match pattern.match_at_start(input) {
244                    Some(m) => {
245                        *input = &input[m.consumed..];
246                    }
247                    None => {
248                        winnow::combinator::fail
249                            .context(ctx_desc("take <sep pattern>"))
250                            .parse_next(input)?;
251                    }
252                }
253            } else {
254                literal(self.sep_str())
255                    .context(ctx_desc("take <sep>"))
256                    .parse_next(input)?;
257            }
258        }
259        Ok(())
260    }
261    pub fn try_consume_sep(&self, input: &mut &str) -> WResult<()> {
262        if self.is_take {
263            if let Some(SepEnum::Whitespace) = &self.cur_val {
264                // For Whitespace, optionally accept either space or tab
265                opt(alt((literal(" "), literal("\t")))).parse_next(input)?;
266            } else if let Some(SepEnum::Pattern(pattern)) = &self.cur_val {
267                if let Some(m) = pattern.match_at_start(input) {
268                    *input = &input[m.consumed..];
269                }
270            } else {
271                opt(literal(self.sep_str())).parse_next(input)?;
272            }
273        }
274        Ok(())
275    }
276    pub fn is_space_sep(&self) -> bool {
277        !self.is_pattern() && self.sep_str() == " "
278    }
279
280    pub fn need_take_sep(&self) -> bool {
281        !(self.is_to_end() || self.is_space_sep())
282    }
283
284    pub fn read_until_any_char<'a>(end1: &str, end2: &str, data: &mut &'a str) -> WResult<&'a str> {
285        let ends1 = end1.as_bytes();
286        let ends2 = end2.as_bytes();
287        alt((
288            quot_r_str,
289            quot_str,
290            take_while(0.., |c: char| {
291                !(ends1.contains(&(c as u8)) || ends2.contains(&(c as u8)))
292            }),
293            take_to_end,
294        ))
295        .parse_next(data)
296    }
297
298    pub fn read_until_sep(&self, data: &mut &str) -> WResult<String> {
299        // 读到当前分隔符,若存在"次级结束符"(ups_val),应以"最近结束优先"裁剪。
300        // 特殊值:\0 由 is_to_end() 覆盖;单字符对使用 read_until_any_char 快路径。
301        if self.is_to_end() {
302            let buf = take_to_end.parse_next(data)?;
303            return Ok(buf.to_string());
304        }
305
306        // Handle Whitespace separator specially
307        if let Some(SepEnum::Whitespace) = &self.cur_val {
308            // Take until space or tab
309            let buf = take_while(0.., |c: char| c != ' ' && c != '\t').parse_next(data)?;
310            return Ok(buf.to_string());
311        }
312
313        // Handle Pattern separator
314        if let Some(SepEnum::Pattern(pattern)) = &self.cur_val {
315            let s = *data;
316            // Exclude quoted segments (consistent with existing logic)
317            if s.starts_with('"') || s.starts_with("r#\"") || s.starts_with("r\"") {
318                let buf = alt((quot_r_str, quot_str)).parse_next(data)?;
319                return Ok(buf.to_string());
320            }
321            return match pattern.find(s) {
322                Some((offset, _sep_match)) => {
323                    let content = &s[..offset];
324                    // Only advance past field content; leave the separator
325                    // in the input stream for consume_sep to handle.
326                    *data = &s[offset..];
327                    Ok(content.to_string())
328                }
329                None => Ok(take_to_end.parse_next(data)?.to_string()),
330            };
331        }
332
333        if let Some(ups) = &self.ups_val {
334            // 快路径:单字符对,使用按字符扫描,天然最近结束优先
335            if self.sep_str().len() == 1 && ups.len() == 1 {
336                let buf = Self::read_until_any_char(self.sep_str(), ups.as_str(), data)?;
337                return Ok(buf.to_string());
338            }
339            // 常规:对多字符分隔的最近结束优先实现
340            let s = *data;
341            // 若下一个是引号,优先让上层调用流按引号解析;保持与既有行为一致
342            // (复杂场景建议使用 json/kv 等协议解析器避免干扰)。
343            if s.starts_with('"') || s.starts_with("r#\"") || s.starts_with("r\"") {
344                // 引号或原始字符串优先整体解析,避免被错误切分
345                let buf = alt((quot_r_str, quot_str)).parse_next(data)?;
346                return Ok(buf.to_string());
347            }
348            let p = s.find(self.sep_str());
349            let q = s.find(ups.as_str());
350            let idx = match (p, q) {
351                (Some(i), Some(j)) => Some(i.min(j)),
352                (Some(i), None) => Some(i),
353                (None, Some(j)) => Some(j),
354                (None, None) => None,
355            };
356            if let Some(i) = idx {
357                let (left, right) = s.split_at(i);
358                *data = right; // 保持与 take_until 一致:不消费结束符本身
359                return Ok(left.to_string());
360            }
361            let buf = take_to_end.parse_next(data)?;
362            return Ok(buf.to_string());
363        }
364        // 无次级结束符:原有语义
365        let buf = alt((
366            quot_r_str,
367            quot_str,
368            take_until(0.., self.sep_str()),
369            take_to_end,
370        ))
371        .parse_next(data)?;
372        Ok(buf.to_string())
373    }
374    pub fn read_until_sep_repeat(&self, num: usize, data: &mut &str) -> WResult<String> {
375        // Pattern separators are not supported in repeat mode.
376        if self.is_pattern() {
377            return winnow::combinator::fail
378                .context(ctx_desc("sep pattern not supported in repeat mode"))
379                .parse_next(data);
380        }
381        let buffer: Vec<&str> = separated(
382            Range::from(num),
383            take_until(1.., self.sep_str()),
384            self.sep_str(),
385        )
386        .parse_next(data)?;
387
388        let msg = buffer.join(self.sep_str());
389        Ok(msg)
390    }
391}
392
393impl Display for WplFmt<&WplSep> {
394    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
395        if !self.0.infer {
396            if let Some(SepEnum::Pattern(p)) = &self.0.cur_val {
397                write!(f, "{{{}}}", p.raw())?;
398            } else {
399                for c in self.0.sep_str().chars() {
400                    if c != ' ' {
401                        write!(f, "\\{}", c)?;
402                    }
403                }
404            }
405        }
406        Ok(())
407    }
408}
409
410impl Display for GenFmt<&WplSep> {
411    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
412        if let Some(SepEnum::Pattern(p)) = &self.0.cur_val {
413            write!(f, "{{{}}}", p.raw())?;
414        } else {
415            write!(f, "{}", self.0.sep_str())?;
416        }
417        Ok(())
418    }
419}
420
421#[cfg(test)]
422mod tests {
423    use super::*;
424
425    #[test]
426    fn test_sep_enum_from_str() {
427        // Test \s -> space
428        assert_eq!(SepEnum::from("\\s"), SepEnum::Str(" ".into()));
429        assert_eq!(SepEnum::from("s"), SepEnum::Str(" ".into()));
430
431        // Test \t -> tab
432        assert_eq!(SepEnum::from("\\t"), SepEnum::Str("\t".into()));
433        assert_eq!(SepEnum::from("t"), SepEnum::Str("\t".into()));
434
435        // Test \S -> Whitespace
436        assert_eq!(SepEnum::from("\\S"), SepEnum::Whitespace);
437        assert_eq!(SepEnum::from("S"), SepEnum::Whitespace);
438
439        // Test \0 -> End
440        assert_eq!(SepEnum::from("\\0"), SepEnum::End);
441        assert_eq!(SepEnum::from("0"), SepEnum::End);
442
443        // Test regular string
444        assert_eq!(SepEnum::from(","), SepEnum::Str(",".into()));
445    }
446
447    #[test]
448    fn test_whitespace_sep_read_until() {
449        // Test reading until space
450        let mut data = "hello world";
451        let sep = WplSep::field_sep("\\S");
452        let result = sep.read_until_sep(&mut data).unwrap();
453        assert_eq!(result, "hello");
454        assert_eq!(data, " world");
455
456        // Test reading until tab
457        let mut data = "hello\tworld";
458        let sep = WplSep::field_sep("\\S");
459        let result = sep.read_until_sep(&mut data).unwrap();
460        assert_eq!(result, "hello");
461        assert_eq!(data, "\tworld");
462    }
463
464    #[test]
465    fn test_tab_sep_read_until() {
466        // Test reading until tab
467        let mut data = "field1\tfield2\tfield3";
468        let sep = WplSep::field_sep("\\t");
469        let result = sep.read_until_sep(&mut data).unwrap();
470        assert_eq!(result, "field1");
471        assert_eq!(data, "\tfield2\tfield3");
472    }
473
474    #[test]
475    fn test_whitespace_consume_sep() {
476        // Test consuming space with Whitespace separator
477        let mut data = " world";
478        let sep = WplSep::field_sep("\\S");
479        sep.consume_sep(&mut data).unwrap();
480        assert_eq!(data, "world");
481
482        // Test consuming tab with Whitespace separator
483        let mut data = "\tworld";
484        let sep = WplSep::field_sep("\\S");
485        sep.consume_sep(&mut data).unwrap();
486        assert_eq!(data, "world");
487    }
488
489    #[test]
490    fn test_tab_consume_sep() {
491        // Test consuming tab
492        let mut data = "\tfield2";
493        let sep = WplSep::field_sep("\\t");
494        sep.consume_sep(&mut data).unwrap();
495        assert_eq!(data, "field2");
496    }
497
498    // ── Pattern integration tests ────────────────────────────────────
499
500    #[test]
501    fn test_pattern_read_until_sep_literal() {
502        use crate::ast::syntax::sep_pattern::build_pattern;
503        let pat = build_pattern("abc").unwrap();
504        let sep = WplSep::field_sep_pattern(pat);
505        let mut data = "xyzabcdef";
506        let result = sep.read_until_sep(&mut data).unwrap();
507        assert_eq!(result, "xyz");
508        // data stops AT the separator, not past it
509        assert_eq!(data, "abcdef");
510    }
511
512    #[test]
513    fn test_pattern_read_until_sep_glob() {
514        use crate::ast::syntax::sep_pattern::build_pattern;
515        let pat = build_pattern("*=").unwrap();
516        let sep = WplSep::field_sep_pattern(pat);
517        let mut data = "key=value";
518        let result = sep.read_until_sep(&mut data).unwrap();
519        // Star non-greedy: "*=" → Star matches "key" (field content), "=" is separator
520        // data stops AT the separator "=value"
521        assert_eq!(result, "key");
522        assert_eq!(data, "=value");
523    }
524
525    #[test]
526    fn test_pattern_read_until_sep_no_match() {
527        use crate::ast::syntax::sep_pattern::build_pattern;
528        let pat = build_pattern("xyz").unwrap();
529        let sep = WplSep::field_sep_pattern(pat);
530        let mut data = "abcdef";
531        let result = sep.read_until_sep(&mut data).unwrap();
532        assert_eq!(result, "abcdef");
533        assert_eq!(data, "");
534    }
535
536    #[test]
537    fn test_pattern_consume_sep() {
538        use crate::ast::syntax::sep_pattern::build_pattern;
539        let pat = build_pattern("\\s=").unwrap();
540        let sep = WplSep::field_sep_pattern(pat);
541        let mut data = "  =value";
542        sep.consume_sep(&mut data).unwrap();
543        assert_eq!(data, "value");
544    }
545
546    #[test]
547    fn test_pattern_try_consume_sep() {
548        use crate::ast::syntax::sep_pattern::build_pattern;
549        let pat = build_pattern("\\s=").unwrap();
550        let sep = WplSep::field_sep_pattern(pat);
551        // When it matches
552        let mut data = " =value";
553        sep.try_consume_sep(&mut data).unwrap();
554        assert_eq!(data, "value");
555        // When it doesn't match — input unchanged
556        let mut data = "value";
557        sep.try_consume_sep(&mut data).unwrap();
558        assert_eq!(data, "value");
559    }
560
561    #[test]
562    fn test_pattern_is_pattern() {
563        use crate::ast::syntax::sep_pattern::build_pattern;
564        let pat = build_pattern("abc").unwrap();
565        let sep = WplSep::field_sep_pattern(pat);
566        assert!(sep.is_pattern());
567        assert!(!sep.is_space_sep());
568
569        let sep2 = WplSep::field_sep(",");
570        assert!(!sep2.is_pattern());
571    }
572
573    #[test]
574    fn test_pattern_display_wpl_fmt() {
575        use crate::ast::syntax::sep_pattern::build_pattern;
576        let pat = build_pattern("*\\s(key=)").unwrap();
577        let sep = WplSep::field_sep_pattern(pat);
578        let display = format!("{}", WplFmt(&sep));
579        assert_eq!(display, "{*\\s(key=)}");
580    }
581
582    #[test]
583    fn test_pattern_display_gen_fmt() {
584        use crate::ast::syntax::sep_pattern::build_pattern;
585        let pat = build_pattern("abc").unwrap();
586        let sep = WplSep::field_sep_pattern(pat);
587        let display = format!("{}", GenFmt(&sep));
588        assert_eq!(display, "{abc}");
589    }
590
591    #[test]
592    fn test_pattern_serde_roundtrip() {
593        use crate::ast::syntax::sep_pattern::build_pattern;
594        let pat = build_pattern("*=").unwrap();
595        let sep = WplSep::field_sep_pattern(pat);
596        let json = serde_json::to_string(&sep).unwrap();
597        let sep2: WplSep = serde_json::from_str(&json).unwrap();
598        assert_eq!(sep, sep2);
599    }
600
601    #[test]
602    fn test_pattern_preserve_read_until() {
603        use crate::ast::syntax::sep_pattern::build_pattern;
604        let pat = build_pattern("*\\s(key=)").unwrap();
605        let sep = WplSep::field_sep_pattern(pat);
606        let mut data = "hello  key=value";
607        let result = sep.read_until_sep(&mut data).unwrap();
608        // Star matches "hello" (field content), data stops AT separator "  key=value"
609        assert_eq!(result, "hello");
610        assert_eq!(data, "  key=value");
611    }
612
613    #[test]
614    fn test_pattern_read_then_consume() {
615        // Verify read_until_sep + consume_sep round-trip works correctly.
616        use crate::ast::syntax::sep_pattern::build_pattern;
617
618        // Literal pattern
619        let pat = build_pattern(",").unwrap();
620        let sep = WplSep::field_sep_pattern(pat);
621        let mut data = "aaa,bbb";
622        let f1 = sep.read_until_sep(&mut data).unwrap();
623        assert_eq!(f1, "aaa");
624        assert_eq!(data, ",bbb");
625        sep.consume_sep(&mut data).unwrap();
626        assert_eq!(data, "bbb");
627
628        // Glob pattern with Star
629        let pat = build_pattern("*=").unwrap();
630        let sep = WplSep::field_sep_pattern(pat);
631        let mut data = "key=value";
632        let f1 = sep.read_until_sep(&mut data).unwrap();
633        assert_eq!(f1, "key");
634        assert_eq!(data, "=value");
635        sep.consume_sep(&mut data).unwrap();
636        assert_eq!(data, "value");
637
638        // Whitespace glob pattern
639        let pat = build_pattern("\\s=").unwrap();
640        let sep = WplSep::field_sep_pattern(pat);
641        let mut data = "key  =value";
642        let f1 = sep.read_until_sep(&mut data).unwrap();
643        assert_eq!(f1, "key");
644        assert_eq!(data, "  =value");
645        sep.consume_sep(&mut data).unwrap();
646        assert_eq!(data, "value");
647    }
648}