1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
use super::*;
use alloc::{string::ToString, vec};
use ritelinked::LinkedHashMap;

mod directive;
mod grammar;

/// The option of [`Parser::take_while`].
pub enum TakeOpt {
    /// Match once.
    One,
    /// Match in range. Same as regex `{a,b}`.
    /// `Range(0, 1)` is same as regex `?`.
    Range(usize, usize),
    /// Match until mismatched.
    /// `More(0)` is same as regex `*`, and `More(1)` is same as regex `+`.
    More(usize),
}

/// Basic greedy parser with YAML syntax.
///
/// Its methods are actually the sub-parser of the syntax.
pub struct Parser<'a> {
    doc: &'a [u8],
    indent: Vec<usize>,
    consumed: u64,
    pub(crate) version_checked: bool,
    pub(crate) tag: LinkedHashMap<String, String>,
    /// Current position.
    pub pos: usize,
    /// Read position.
    pub eaten: usize,
}

impl Default for Parser<'_> {
    fn default() -> Self {
        let mut tag = LinkedHashMap::new();
        tag.insert("!".to_string(), String::new());
        tag.insert("!!".to_string(), tag_prefix!().to_string());
        Self {
            doc: b"",
            indent: vec![0],
            consumed: 0,
            version_checked: false,
            tag,
            pos: 0,
            eaten: 0,
        }
    }
}

/// The implementation of string pointer.
impl<'a> Parser<'a> {
    /// Create a parser with the string.
    pub fn new(doc: &'a [u8]) -> Self {
        Self::default().with_doc(doc)
    }

    /// Attach document on the parser.
    pub fn with_doc(self, doc: &'a [u8]) -> Self {
        Self { doc, ..self }
    }

    /// Show the right hand side string after the current cursor.
    pub fn food(&self) -> &'a [u8] {
        &self.doc[self.pos..]
    }

    /// Get the text from the eaten cursor to the current position.
    pub fn text(&mut self) -> String {
        if self.eaten < self.pos {
            String::from_utf8_lossy(&self.doc[self.eaten..self.pos]).into()
        } else {
            String::new()
        }
    }
}

/// The low level grammar implementation.
///
/// These sub-parser returns `Result<(), PError>`, and calling [`Parser::backward`] if mismatched.
impl Parser<'_> {
    /// Set the starting point if character boundary is valid.
    pub fn pos(self, pos: usize) -> Self {
        Self {
            pos,
            eaten: pos,
            ..self
        }
    }

    /// Get the indicator.
    pub fn indicator(&self) -> u64 {
        self.consumed + self.pos as u64
    }

    /// A short function to raise error.
    pub fn err<R>(&self, name: &'static str) -> PResult<R> {
        Err(PError::Terminate {
            name,
            msg: indicated_msg(self.doc, self.indicator()),
        })
    }

    /// Consume and move the pointer.
    pub fn consume(&mut self) {
        self.forward();
        self.consumed += self.eaten as u64;
        self.eaten = 0;
        self.backward();
    }

    /// Consume the eaten part.
    pub fn forward(&mut self) {
        self.eaten = self.pos;
    }

    /// Move the current position back.
    pub fn backward(&mut self) {
        self.pos = self.eaten;
    }

    /// Move back current cursor.
    pub fn back(&mut self, n: usize) {
        self.pos -= n;
    }

    /// Match symbol.
    pub fn sym(&mut self, s: u8) -> PResult<()> {
        self.sym_set(&[s])
    }

    /// Match symbol set.
    pub fn sym_set(&mut self, s: &[u8]) -> PResult<()> {
        self.take_while(Self::is_in(s), TakeOpt::One)
    }

    /// Match symbol sequence.
    pub fn sym_seq(&mut self, s: &[u8]) -> PResult<()> {
        for s in s {
            self.sym(*s)?;
        }
        Ok(())
    }

    /// Match until the condition failed.
    ///
    /// The argument `opt` matches different terminate requirement.
    pub fn take_while<F>(&mut self, f: F, opt: TakeOpt) -> PResult<()>
    where
        F: Fn(&u8) -> bool,
    {
        let pos = self.pos;
        let mut counter = 0;
        for c in self.food() {
            if !f(c) {
                break;
            }
            self.pos += 1;
            counter += 1;
            if let TakeOpt::One = opt {
                break;
            }
            if let TakeOpt::Range(_, c) = opt {
                if counter == c {
                    break;
                }
            }
        }
        if pos == self.pos {
            if let TakeOpt::More(c) | TakeOpt::Range(c, _) = opt {
                if c == 0 {
                    return Ok(());
                }
            }
            self.backward();
            Err(PError::Mismatch)
        } else {
            if let TakeOpt::More(c) | TakeOpt::Range(c, _) = opt {
                if counter < c {
                    self.backward();
                    return Err(PError::Mismatch);
                }
            }
            Ok(())
        }
    }

    /// Count the position that parser goes, expect error.
    pub fn count<F, R>(&mut self, f: F) -> PResult<usize>
    where
        F: FnOnce(&mut Self) -> PResult<R>,
    {
        let pos = self.pos;
        let _ = f(self)?;
        Ok(self.pos - pos)
    }

    /// A wrapper for saving checkpoint locally.
    pub fn context<F, R>(&mut self, f: F) -> R
    where
        F: FnOnce(&mut Self) -> R,
    {
        let eaten = self.eaten;
        self.forward();
        let r = f(self);
        self.eaten = eaten;
        r
    }

    /// A SET detector.
    pub fn is_in(s: &[u8]) -> impl Fn(&u8) -> bool + '_ {
        move |c| !Self::not_in(s)(c)
    }

    /// A NOT detector.
    pub fn not_in(s: &[u8]) -> impl Fn(&u8) -> bool + '_ {
        move |c| {
            for s in s {
                if c == s {
                    return false;
                }
            }
            true
        }
    }

    /// Match indent.
    pub fn ind(&mut self, level: usize) -> PResult<()> {
        if level >= self.indent.len() {
            for _ in 0..level - self.indent.len() + 1 {
                self.indent.push(2);
            }
        } else {
            // Clear the old indent settings
            self.indent.drain(level + 1..);
        }
        for _ in 0..self.indent[..=level].iter().sum() {
            self.sym(b' ')?;
        }
        Ok(())
    }
}