efx_core/ast/
parser.rs

1use crate::ast::PResult;
2use crate::ast::error::ParseError;
3use crate::ast::lexer::Lexer;
4use crate::ast::nodes::{Attr, Element, Interpolation, Node, Text};
5use crate::ast::span_range::SpanRange;
6use crate::ast::tok::Tok;
7
8pub struct Parser<'a> {
9    toks: Vec<(Tok, SpanRange)>,
10    i: usize,
11    _src: &'a str,
12}
13
14impl<'a> Parser<'a> {
15    pub fn new(src: &'a str) -> Self {
16        Self {
17            toks: Lexer::new(src).all(),
18            i: 0,
19            _src: src,
20        }
21    }
22
23    pub fn parse_nodes(&mut self) -> PResult<Vec<Node>> {
24        let mut nodes = Vec::new();
25        while !self.eof() {
26            match self.cur() {
27                Some((Tok::LAngle, _)) => nodes.push(self.parse_element()?),
28                Some((Tok::LBrace, _)) => nodes.push(self.parse_i11n()?),
29                Some((Tok::Text(_), _)) => nodes.push(self.parse_text()?),
30                Some((tok, sp)) => {
31                    return Err(ParseError {
32                        msg: format!("unexpected token: {:?}", tok),
33                        span: *sp,
34                    });
35                }
36                None => break,
37            }
38        }
39
40        Ok(nodes)
41    }
42
43    fn eof(&self) -> bool {
44        self.i >= self.toks.len()
45    }
46    fn cur(&self) -> Option<&(Tok, SpanRange)> {
47        self.toks.get(self.i)
48    }
49    fn bump(&mut self) {
50        self.i += 1;
51    }
52
53    fn byte_end(&self) -> usize {
54        self.toks.last().map(|t| (t.1.end).0).unwrap_or(0)
55    }
56
57    fn expect(&mut self, want: Tok) -> PResult<SpanRange> {
58        if let Some((tok, sp)) = self.cur().cloned() {
59            if tok == want {
60                self.bump();
61                return Ok(sp);
62            }
63        }
64
65        let span = self
66            .cur()
67            .map(|t| t.1)
68            .unwrap_or_else(|| SpanRange::new(self.byte_end(), self.byte_end()));
69        Err(ParseError {
70            msg: format!("expected {:?}", want),
71            span,
72        })
73    }
74
75    fn parse_name(&mut self) -> PResult<(String, SpanRange)> {
76        match self.cur().cloned() {
77            Some((Tok::Text(s), sp)) => {
78                let mut name_end = s.len();
79                for (idx, ch) in s.char_indices() {
80                    if ch.is_whitespace() || matches!(ch, '<' | '>' | '/' | '{' | '}') {
81                        name_end = idx;
82                        break;
83                    }
84                }
85                if name_end == 0 {
86                    return Err(ParseError {
87                        msg: "invalid tag name".into(),
88                        span: sp,
89                    });
90                }
91
92                let name = s[..name_end].to_string();
93
94                self.bump();
95
96                let remainder = &s[name_end..];
97                if !remainder.is_empty() {
98                    let rem_start = sp.start.0 + name_end;
99                    // Insert directly at the current position of self.i - the next call to cur() will see it
100                    self.toks.insert(
101                        self.i,
102                        (
103                            Tok::Text(remainder.to_string()),
104                            SpanRange::new(rem_start, sp.end.0),
105                        ),
106                    );
107                }
108
109                let name_span = SpanRange::new(sp.start.0, sp.start.0 + name_end);
110                Ok((name, name_span))
111            }
112            Some((_tok, sp)) => Err(ParseError {
113                msg: "expected tag name".into(),
114                span: sp,
115            }),
116            None => Err(ParseError {
117                msg: "unexpected EOF when reading tag name".into(),
118                span: SpanRange::new(self.byte_end(), self.byte_end()),
119            }),
120        }
121    }
122
123    fn parse_element(&mut self) -> PResult<Node> {
124        let start = self.expect(Tok::LAngle)?.start;
125        let (name, _nsp) = self.parse_name()?;
126
127        // Collect the "tail" after the name until '>' or '/>'
128        let mut attrs_src = String::new();
129        let mut attrs_span_start = None;
130        let mut attrs_span_end = None;
131
132        loop {
133            match self.cur() {
134                Some((Tok::Slash, _)) => {
135                    return if let Some((Tok::RAngle, sp_gt)) = self.toks.get(self.i + 1).cloned() {
136                        self.bump(); // '/'
137                        self.bump(); // '>'
138
139                        let end = sp_gt.end;
140                        let attrs = parse_attrs_from_buffer(
141                            &attrs_src,
142                            SpanRange {
143                                start: attrs_span_start.unwrap_or(start),
144                                end: attrs_span_end.unwrap_or(start),
145                            },
146                        )?;
147                        Ok(Node::Element(Element {
148                            name,
149                            attrs,
150                            children: vec![],
151                            span: SpanRange { start, end },
152                        }))
153                    } else {
154                        let sp = self.cur().unwrap().1;
155                        Err(ParseError {
156                            msg: "unexpected '/' in tag head".into(),
157                            span: sp,
158                        })
159                    };
160                }
161                Some((Tok::RAngle, sp_gt)) => {
162                    // Closed the opening tag: now parse the children or end empty
163                    let end_open = *sp_gt;
164                    self.bump();
165
166                    let attrs = parse_attrs_from_buffer(
167                        &attrs_src,
168                        SpanRange {
169                            start: attrs_span_start.unwrap_or(start),
170                            end: attrs_span_end.unwrap_or(end_open.end),
171                        },
172                    )?;
173
174                    let mut children = Vec::new();
175                    loop {
176                        match self.cur() {
177                            Some((Tok::LAngle, _)) => {
178                                if let Some((Tok::Slash, _)) = self.toks.get(self.i + 1) {
179                                    // </name>
180                                    self.bump(); // '<'
181                                    self.bump(); // '/'
182                                    let (close_name, sp_name) = self.parse_name()?;
183                                    if close_name != name {
184                                        return Err(ParseError {
185                                            msg: format!(
186                                                "unmatched closing tag: expected </{}>",
187                                                name
188                                            ),
189                                            span: sp_name,
190                                        });
191                                    }
192                                    let end_angle = self.expect(Tok::RAngle)?;
193                                    return Ok(Node::Element(Element {
194                                        name,
195                                        attrs,
196                                        children,
197                                        span: SpanRange {
198                                            start,
199                                            end: end_angle.end,
200                                        },
201                                    }));
202                                } else {
203                                    children.push(self.parse_element()?);
204                                }
205                            }
206                            Some((Tok::LBrace, _)) => children.push(self.parse_i11n()?),
207                            Some((Tok::Text(_), _)) => children.push(self.parse_text()?),
208                            Some((tok, sp)) => {
209                                return Err(ParseError {
210                                    msg: format!("unexpected token in element body: {:?}", tok),
211                                    span: *sp,
212                                });
213                            }
214                            None => {
215                                return Err(ParseError {
216                                    msg: "unexpected EOF in element body".into(),
217                                    span: SpanRange::new(self.byte_end(), self.byte_end()),
218                                });
219                            }
220                        }
221                    }
222                }
223                Some((Tok::Text(s), sp)) => {
224                    // There can be spaces and attributes between the name and '>'
225                    if attrs_span_start.is_none() {
226                        attrs_span_start = Some(sp.start);
227                    }
228                    attrs_span_end = Some(sp.end);
229                    attrs_src.push_str(s);
230                    self.bump();
231                }
232                Some((tok, sp)) => {
233                    return Err(ParseError {
234                        msg: format!("unexpected token in tag head: {:?}", tok),
235                        span: *sp,
236                    });
237                }
238                None => {
239                    return Err(ParseError {
240                        msg: "unexpected EOF in tag head".into(),
241                        span: SpanRange::new(self.byte_end(), self.byte_end()),
242                    });
243                }
244            }
245        }
246    }
247
248    fn parse_text(&mut self) -> PResult<Node> {
249        let mut start = None;
250        let mut end = None;
251        let mut buf = String::new();
252
253        while let Some((Tok::Text(s), sp)) = self.cur().cloned() {
254            if start.is_none() {
255                start = Some(sp.start);
256            }
257            end = Some(sp.end);
258            self.bump();
259            buf.push_str(&s);
260        }
261
262        if let (Some(st), Some(en)) = (start, end) {
263            Ok(Node::Text(Text {
264                value: buf,
265                span: SpanRange { start: st, end: en },
266            }))
267        } else {
268            let span = self
269                .cur()
270                .map(|t| t.1)
271                .unwrap_or_else(|| SpanRange::new(self.byte_end(), self.byte_end()));
272            Err(ParseError {
273                msg: "expected text".into(),
274                span,
275            })
276        }
277    }
278
279    fn parse_i11n(&mut self) -> PResult<Node> {
280        let start = self.expect(Tok::LBrace)?.start;
281        // Read everything up to a single RBrace (}} was already converted to Text("}"))
282        let mut expr_src = String::new();
283
284        loop {
285            match self.cur().cloned() {
286                Some((Tok::RBrace, sp)) => {
287                    self.bump();
288                    return Ok(Node::I11n(Interpolation {
289                        expr_src,
290                        span: SpanRange { start, end: sp.end },
291                    }));
292                }
293                Some((Tok::Text(s), _)) => {
294                    expr_src.push_str(&s);
295                    self.bump();
296                }
297                Some((Tok::LBrace, sp)) => {
298                    // Nested { } are not supported (let's simplify the first version)
299                    return Err(ParseError {
300                        msg: "nested '{' in interpolation".into(),
301                        span: sp,
302                    });
303                }
304                Some((_tok, sp)) => {
305                    return Err(ParseError {
306                        msg: "unexpected token inside { }".into(),
307                        span: sp,
308                    });
309                }
310                None => {
311                    return Err(ParseError {
312                        msg: "unexpected EOF in { }".into(),
313                        span: SpanRange::new(self.byte_end(), self.byte_end()),
314                    });
315                }
316            }
317        }
318    }
319}
320
321fn parse_attrs_from_buffer(buf: &str, span: SpanRange) -> PResult<Vec<Attr>> {
322    let mut attrs = Vec::new();
323    let bytes = buf.as_bytes();
324    let mut i = 0usize;
325
326    let skip_ws = |b: &[u8], mut j: usize| {
327        while j < b.len() && b[j].is_ascii_whitespace() {
328            j += 1;
329        }
330        j
331    };
332    let is_name_char =
333        |c: u8| -> bool { c.is_ascii_alphanumeric() || c == b'_' || c == b'-' || c == b':' };
334
335    i = skip_ws(bytes, i);
336    while i < bytes.len() {
337        let name_start = i;
338        while i < bytes.len() && is_name_char(bytes[i]) {
339            i += 1;
340        }
341        if i == name_start {
342            // no name - maybe only spaces are left
343            i = skip_ws(bytes, i);
344            if i >= bytes.len() {
345                break;
346            }
347
348            return Err(ParseError {
349                msg: "invalid attribute name".into(),
350                span,
351            });
352        }
353
354        let name = &buf[name_start..i];
355        i = skip_ws(bytes, i);
356        if i >= bytes.len() || bytes[i] != b'=' {
357            return Err(ParseError {
358                msg: format!("expected '=' after attribute '{}'", name),
359                span,
360            });
361        }
362        i += 1;
363        i = skip_ws(bytes, i);
364
365        if i >= bytes.len() || (bytes[i] != b'"' && bytes[i] != b'\'') {
366            return Err(ParseError {
367                msg: format!("expected quote after '=' in attribute '{}'", name),
368                span,
369            });
370        }
371        let quote = bytes[i];
372        i += 1;
373        let val_start = i;
374        while i < bytes.len() && bytes[i] != quote {
375            i += 1;
376        }
377        if i >= bytes.len() {
378            return Err(ParseError {
379                msg: format!("unterminated quoted value for attribute '{}'", name),
380                span,
381            });
382        }
383        let value = &buf[val_start..i];
384        i += 1; // closing quote
385
386        attrs.push(Attr {
387            name: name.to_string(),
388            value: value.to_string(),
389            span,
390        });
391
392        i = skip_ws(bytes, i);
393    }
394
395    Ok(attrs)
396}