yaml_peg/parser/base/
grammar.rs

1use super::*;
2use alloc::format;
3use core::cmp::Ordering;
4
5/// The low level grammar implementation for YAML.
6///
7/// These sub-parser returns `Result<R, PError>`, and calling
8/// [`Parser::backward`] if mismatched.
9impl Parser<'_> {
10    /// Match invisible boundaries and keep the gaps. (must matched once)
11    pub fn bound(&mut self) -> PResult<()> {
12        self.sym_set(b":{}[] ,\n\r")?;
13        self.back(1);
14        self.ws(TakeOpt::More(0))
15    }
16
17    /// Match complex mapping indicator (`?`).
18    pub fn complex_mapping(&mut self) -> PResult<()> {
19        self.sym(b'?')?;
20        self.bound()
21    }
22
23    fn num_prefix(&mut self) -> PResult<()> {
24        self.sym(b'-').unwrap_or_default();
25        self.take_while(u8::is_ascii_digit, TakeOpt::More(1))
26    }
27
28    /// Match integer.
29    pub fn int(&mut self) -> PResult<String> {
30        self.num_prefix()?;
31        let mut s = self.text();
32        if s.as_bytes() == b"0" && self.context(|p| p.octal().is_ok() || p.hexadecimal().is_ok()) {
33            s = self.text();
34        }
35        self.ws(TakeOpt::More(0))?;
36        self.bound()?;
37        Ok(s)
38    }
39
40    fn octal(&mut self) -> PResult<()> {
41        self.sym(b'o')?;
42        self.take_while(Self::ascii_digit(8), TakeOpt::More(1))
43    }
44
45    fn hexadecimal(&mut self) -> PResult<()> {
46        self.sym(b'x')?;
47        self.take_while(Self::ascii_digit(16), TakeOpt::More(1))
48    }
49
50    fn ascii_digit(i: u8) -> impl Fn(&u8) -> bool + 'static {
51        move |c| c.is_ascii_digit() || (*c > b'a' && *c < b'a' + i) || (*c > b'A' && *c < b'A' + i)
52    }
53
54    /// Match float.
55    pub fn float(&mut self) -> PResult<String> {
56        self.num_prefix()?;
57        self.sym(b'.')?;
58        self.take_while(u8::is_ascii_digit, TakeOpt::More(0))?;
59        let s = self.text();
60        if self.count(|p| p.ws(TakeOpt::More(0)))? > 0 {
61            self.back(1);
62        }
63        self.bound()?;
64        Ok(s.trim_end_matches(".0").to_string())
65    }
66
67    /// Match float with scientific notation.
68    pub fn sci_float(&mut self) -> PResult<String> {
69        self.num_prefix()?;
70        self.sym_set(b"eE")?;
71        self.take_while(Self::is_in(b"+-"), TakeOpt::Range(0, 1))?;
72        self.take_while(u8::is_ascii_digit, TakeOpt::More(1))?;
73        let s = self.text();
74        if self.count(|p| p.ws(TakeOpt::More(0)))? > 0 {
75            self.back(1);
76        }
77        self.bound()?;
78        Ok(s)
79    }
80
81    /// Match quoted string.
82    pub fn string_quoted(&mut self, sym: u8, ignore: &[u8]) -> PResult<String> {
83        self.context(|p| {
84            p.sym(sym)?;
85            p.forward();
86            let mut v = String::new();
87            p.ws(TakeOpt::More(0))?;
88            v.push_str(&p.text());
89            loop {
90                p.forward();
91                p.take_while(Self::not_in(&[b'\n', b'\r', b'\\', sym]), TakeOpt::More(0))?;
92                v.push_str(&p.text());
93                p.forward();
94                if p.sym_seq(ignore).is_ok() {
95                    v.push(char::from(sym));
96                } else if let Ok(mut t) = p.gap(false) {
97                    if v.ends_with('\\') {
98                        t -= 1;
99                    }
100                    match t.cmp(&1) {
101                        Ordering::Less => {}
102                        Ordering::Equal => {
103                            v.truncate(v.trim_end().len());
104                            // Manual wrapping
105                            if !v.ends_with("\\n") {
106                                v.push(' ');
107                            }
108                        }
109                        Ordering::Greater => {
110                            for _ in 0..t - 1 {
111                                v.push('\n');
112                            }
113                        }
114                    }
115                    // Remove leading space
116                    p.ws(TakeOpt::More(0))?;
117                } else if p.sym(b'\\').is_ok() {
118                    v.push('\\');
119                } else if p.sym(sym).is_ok() {
120                    break;
121                }
122            }
123            Ok(v)
124        })
125    }
126
127    /// Match plain string.
128    pub fn string_plain(&mut self, level: usize, inner: bool) -> PResult<String> {
129        let mut patt = b"[]{}: \n\r".to_vec();
130        if inner {
131            patt.push(b',');
132        }
133        self.context(|p| {
134            let mut v = String::new();
135            let mut is_leading = false;
136            loop {
137                p.forward();
138                p.take_while(Self::not_in(&patt), TakeOpt::More(0))?;
139                v.push_str(&p.text());
140                p.forward();
141                if p.food().is_empty()
142                    || p.sym_seq(b": ").is_ok()
143                    || (p.sym(b':').is_ok() && p.nl().is_ok())
144                    || p.sym_seq(b" #").is_ok()
145                {
146                    p.backward();
147                    break;
148                }
149                p.forward();
150                if p.sym_set(b": ").is_ok() {
151                    // Remove leading space
152                    if is_leading && p.text() == " " {
153                        v.truncate(v.trim_end().len());
154                    }
155                    v.push_str(&p.text());
156                } else if !inner && !v.is_empty() && p.sym_set(b"{}[]").is_ok() {
157                    v.push_str(&p.text());
158                    is_leading = false;
159                } else if p.ind(level).is_err() {
160                    if let Ok(t) = p.gap(true) {
161                        if t == 1 {
162                            v.push(' ');
163                        }
164                        for _ in 0..t - 1 {
165                            v.push('\n');
166                        }
167                        if p.ind(level).is_err() {
168                            break;
169                        }
170                    } else {
171                        break;
172                    }
173                    is_leading = true;
174                }
175            }
176            v.truncate(v.trim_end().len());
177            if v.is_empty() {
178                Err(PError::Mismatch)
179            } else {
180                Ok(v)
181            }
182        })
183    }
184
185    /// Match literal string.
186    pub fn string_literal(&mut self, level: usize) -> PResult<String> {
187        self.sym(b'|')?;
188        let chomp = self.chomp();
189        self.ws(TakeOpt::More(0))?;
190        let s = self.string_wrapped(level, b'\n', true)?;
191        Ok(chomp(s))
192    }
193
194    /// Match folded string.
195    pub fn string_folded(&mut self, level: usize) -> PResult<String> {
196        self.sym(b'>')?;
197        let chomp = self.chomp();
198        self.ws(TakeOpt::More(0))?;
199        let s = self.string_wrapped(level, b' ', false)?;
200        Ok(chomp(s))
201    }
202
203    /// Match string chomping option.
204    pub fn chomp(&mut self) -> impl Fn(String) -> String {
205        self.context(|p| {
206            if p.sym(b'-').is_ok() {
207                |s: String| s.trim_end().to_string()
208            } else if p.sym(b'+').is_ok() {
209                |s| s
210            } else {
211                |s: String| s.trim_end().to_string() + "\n"
212            }
213        })
214    }
215
216    /// Match wrapped string.
217    pub fn string_wrapped(&mut self, level: usize, sep: u8, leading: bool) -> PResult<String> {
218        self.context(|p| {
219            let mut v = String::new();
220            loop {
221                p.nl()?;
222                p.forward();
223                if p.ind(level).is_err() {
224                    if let Ok(t) = p.gap(false) {
225                        for _ in 0..t {
226                            v.push('\n');
227                        }
228                        if p.ind(level).is_err() {
229                            break;
230                        }
231                    } else {
232                        break;
233                    }
234                }
235                p.forward();
236                p.take_while(Self::not_in(b"\n\r"), TakeOpt::More(0))?;
237                let s = p.text();
238                if leading {
239                    if !v.is_empty() {
240                        v.push(char::from(sep));
241                    }
242                    v.push_str(&s);
243                } else {
244                    let s = s.trim_start();
245                    if !v.is_empty() && !v.ends_with(char::is_whitespace) {
246                        v.push(char::from(sep));
247                    }
248                    v.push_str(s);
249                }
250            }
251            // Keep the last wrap
252            p.back(1);
253            Ok(v + "\n")
254        })
255    }
256
257    /// Match an escaped string, return unescaped string.
258    pub fn escape(doc: &str) -> String {
259        let mut s = String::new();
260        let mut b = false;
261        for c in doc.chars() {
262            if c == '\\' && !b {
263                b = true;
264                continue;
265            }
266            s.push(match c {
267                '\\' if b => '\\',
268                'n' if b => '\n',
269                'r' if b => '\r',
270                't' if b => '\t',
271                'b' if b => '\x08',
272                'f' if b => '\x0C',
273                c => c,
274            });
275            b = false;
276        }
277        s
278    }
279
280    /// Match valid YAML identifier.
281    pub fn identifier(&mut self) -> PResult<()> {
282        self.take_while(u8::is_ascii_alphanumeric, TakeOpt::One)?;
283        self.take_while(
284            |c| c.is_ascii_alphanumeric() || *c == b'-',
285            TakeOpt::More(0),
286        )
287    }
288
289    /// Match tags.
290    pub fn tag(&mut self) -> PResult<String> {
291        self.sym(b'!')?;
292        self.context(|p| {
293            p.identifier().unwrap_or_default();
294            let tag = p.text();
295            let prefix = if !tag.is_empty() {
296                if p.sym(b'!').is_ok() {
297                    // Tag prefix variable
298                    p.tag[&tag].clone()
299                } else {
300                    String::new()
301                }
302            } else if p.sym(b'<').is_ok() {
303                // Full tag
304                let tag = p.context(|p| {
305                    p.take_while(Self::not_in(b" <>\n\r"), TakeOpt::More(1))?;
306                    Ok(p.text())
307                })?;
308                p.sym(b'>')?;
309                tag
310            } else if p.sym(b'!').is_ok() {
311                p.tag["!!"].clone()
312            } else {
313                p.tag["!"].clone()
314            };
315            let doc = p.context(|p| {
316                if p.identifier().is_ok() {
317                    p.text()
318                } else {
319                    String::new()
320                }
321            });
322            Ok(format!("{prefix}{doc}"))
323        })
324    }
325
326    /// Match anchor definition.
327    pub fn anchor(&mut self) -> PResult<String> {
328        self.sym(b'&')?;
329        self.context(|p| {
330            p.identifier()?;
331            Ok(p.text())
332        })
333    }
334
335    /// Match anchor used.
336    pub fn anchor_use(&mut self) -> PResult<String> {
337        self.sym(b'*')?;
338        self.context(|p| {
339            p.identifier()?;
340            Ok(p.text())
341        })
342    }
343
344    /// Match any invisible characters except newline.
345    pub fn ws(&mut self, opt: TakeOpt) -> PResult<()> {
346        self.take_while(
347            |c| c.is_ascii_whitespace() && *c != b'\n' && *c != b'\r',
348            opt,
349        )
350    }
351
352    /// Match newline characters.
353    pub fn nl(&mut self) -> PResult<()> {
354        self.context(|p| {
355            (p.sym_seq(b"\r\n").is_ok()
356                || p.sym_seq(b"\n\r").is_ok()
357                || p.sym(b'\n').is_ok()
358                || p.sym(b'\r').is_ok())
359            .then_some(())
360            .ok_or(PError::Mismatch)
361        })
362    }
363
364    /// Match any invisible characters.
365    pub fn inv(&mut self, opt: TakeOpt) -> PResult<()> {
366        self.take_while(u8::is_ascii_whitespace, opt)
367    }
368
369    /// Match and define new indent size.
370    pub fn ind_define(&mut self, level: usize) -> PResult<()> {
371        if level > 0 {
372            self.ind(level - 1)?;
373        }
374        let ind = self.count(|p| p.take_while(|c| c.is_ascii_whitespace(), TakeOpt::More(0)))?;
375        if level == self.indent.len() {
376            self.indent.push(ind);
377        } else {
378            self.indent[level] = ind;
379        }
380        Ok(())
381    }
382
383    /// Match any optional invisible characters between two lines.
384    ///
385    /// Set `cmt` to `true` to ignore comments at the line end.
386    pub fn gap(&mut self, cmt: bool) -> PResult<usize> {
387        self.context(|p| {
388            if cmt {
389                p.comment().unwrap_or_default();
390            }
391            p.nl()?;
392            let mut t = 1;
393            loop {
394                // Check point
395                p.forward();
396                p.ws(TakeOpt::More(0))?;
397                if cmt {
398                    p.comment().unwrap_or_default();
399                }
400                if p.nl().is_err() {
401                    p.backward();
402                    return Ok(t);
403                }
404                t += 1;
405            }
406        })
407    }
408
409    /// Match comment.
410    pub fn comment(&mut self) -> PResult<()> {
411        self.ws(TakeOpt::More(0))?;
412        self.sym(b'#')?;
413        self.take_while(Self::not_in(b"\n\r"), TakeOpt::More(0))
414    }
415}