Skip to main content

forge_codegen/
parser.rs

1//! Parser for Forge `.forge.html` files. Produces a flat token stream that the
2//! lowering pass converts to Askama syntax.
3//!
4//! We use a simple linear scanner rather than `chumsky` here because Forge's
5//! grammar is mostly token-by-token rewrites — a recursive parser would be
6//! overkill and harder to debug for the POC.
7
8use std::fmt;
9
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub enum Token {
12    /// Literal HTML/text content.
13    Text(String),
14
15    /// `{{ expr }}` — escaped output.
16    EscapedExpr(String),
17
18    /// `{!! expr !!}` — unescaped output.
19    RawExpr(String),
20
21    /// `@directive(args)` — Blade-style directive.
22    Directive { name: String, args: Option<String> },
23
24    /// `<x-name attr="val">` — component open.
25    ComponentOpen {
26        name: String,
27        attrs: Vec<(String, String)>,
28        self_closing: bool,
29    },
30
31    /// `</x-name>` — component close.
32    ComponentClose { name: String },
33}
34
35impl fmt::Display for Token {
36    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
37        match self {
38            Token::Text(s) => write!(f, "{s}"),
39            Token::EscapedExpr(e) => write!(f, "{{{{ {e} }}}}"),
40            Token::RawExpr(e) => write!(f, "{{!! {e} !!}}"),
41            Token::Directive {
42                name,
43                args: Some(a),
44            } => write!(f, "@{name}({a})"),
45            Token::Directive { name, args: None } => write!(f, "@{name}"),
46            Token::ComponentOpen {
47                name,
48                attrs,
49                self_closing,
50            } => {
51                write!(f, "<x-{name}")?;
52                for (k, v) in attrs {
53                    write!(f, " {k}=\"{v}\"")?;
54                }
55                if *self_closing {
56                    write!(f, " />")
57                } else {
58                    write!(f, ">")
59                }
60            }
61            Token::ComponentClose { name } => write!(f, "</x-{name}>"),
62        }
63    }
64}
65
66pub fn tokenize(input: &str) -> Vec<Token> {
67    let mut tokens = Vec::new();
68    let bytes = input.as_bytes();
69    let mut i = 0;
70    let mut text_start = 0;
71
72    while i < bytes.len() {
73        // {{ ... }}
74        if i + 1 < bytes.len()
75            && bytes[i] == b'{'
76            && bytes[i + 1] == b'{'
77            && !(i + 2 < bytes.len() && bytes[i + 2] == b'-'/* askama escape */)
78        {
79            flush_text(input, text_start, i, &mut tokens);
80            if let Some(end) = find_close(&input[i + 2..], "}}") {
81                let expr = input[i + 2..i + 2 + end].trim().to_string();
82                tokens.push(Token::EscapedExpr(expr));
83                i += 2 + end + 2;
84                text_start = i;
85                continue;
86            }
87        }
88
89        // {!! ... !!}
90        if i + 2 < bytes.len() && bytes[i] == b'{' && bytes[i + 1] == b'!' && bytes[i + 2] == b'!' {
91            flush_text(input, text_start, i, &mut tokens);
92            if let Some(end) = find_close(&input[i + 3..], "!!}") {
93                let expr = input[i + 3..i + 3 + end].trim().to_string();
94                tokens.push(Token::RawExpr(expr));
95                i += 3 + end + 3;
96                text_start = i;
97                continue;
98            }
99        }
100
101        // @directive
102        if bytes[i] == b'@'
103            && i + 1 < bytes.len()
104            && (bytes[i + 1].is_ascii_alphabetic() || bytes[i + 1] == b'_')
105        {
106            // Escape sequence: `@@` → literal `@`
107            if i + 1 < bytes.len() && bytes[i + 1] == b'@' {
108                // shouldn't reach here since we check alphabetic above
109            }
110            flush_text(input, text_start, i, &mut tokens);
111            let dir_start = i + 1;
112            let mut dir_end = dir_start;
113            while dir_end < bytes.len()
114                && (bytes[dir_end].is_ascii_alphanumeric() || bytes[dir_end] == b'_')
115            {
116                dir_end += 1;
117            }
118            let name = input[dir_start..dir_end].to_string();
119            let mut args = None;
120            let mut new_i = dir_end;
121            if dir_end < bytes.len() && bytes[dir_end] == b'(' {
122                if let Some(close_offset) = find_matching_paren(&input[dir_end..]) {
123                    args = Some(input[dir_end + 1..dir_end + close_offset].to_string());
124                    new_i = dir_end + close_offset + 1;
125                }
126            }
127            tokens.push(Token::Directive { name, args });
128            i = new_i;
129            text_start = i;
130            continue;
131        }
132
133        // <x-component ...>
134        if bytes[i] == b'<' && i + 2 < bytes.len() && bytes[i + 1] == b'x' && bytes[i + 2] == b'-' {
135            flush_text(input, text_start, i, &mut tokens);
136            let after = &input[i + 3..];
137            // Component name
138            let name_end = after
139                .find(|c: char| c.is_whitespace() || c == '>' || c == '/')
140                .unwrap_or(after.len());
141            let name = after[..name_end].to_string();
142            let rest_start = i + 3 + name_end;
143            let close_offset = input[rest_start..]
144                .find('>')
145                .unwrap_or(input.len() - rest_start);
146            let tag_inner = &input[rest_start..rest_start + close_offset];
147            let self_closing = tag_inner.ends_with('/');
148            let attrs = parse_attrs(tag_inner.trim_end_matches('/'));
149            tokens.push(Token::ComponentOpen {
150                name,
151                attrs,
152                self_closing,
153            });
154            i = rest_start + close_offset + 1;
155            text_start = i;
156            continue;
157        }
158
159        // </x-component>
160        if bytes[i] == b'<'
161            && i + 3 < bytes.len()
162            && bytes[i + 1] == b'/'
163            && bytes[i + 2] == b'x'
164            && bytes[i + 3] == b'-'
165        {
166            flush_text(input, text_start, i, &mut tokens);
167            let after = &input[i + 4..];
168            let name_end = after.find('>').unwrap_or(after.len());
169            let name = after[..name_end].trim().to_string();
170            tokens.push(Token::ComponentClose { name });
171            i += 4 + name_end + 1;
172            text_start = i;
173            continue;
174        }
175
176        i += 1;
177    }
178
179    flush_text(input, text_start, bytes.len(), &mut tokens);
180    tokens
181}
182
183fn flush_text(input: &str, start: usize, end: usize, tokens: &mut Vec<Token>) {
184    if end > start {
185        tokens.push(Token::Text(input[start..end].to_string()));
186    }
187}
188
189fn find_close(s: &str, needle: &str) -> Option<usize> {
190    s.find(needle)
191}
192
193fn find_matching_paren(s: &str) -> Option<usize> {
194    // s starts with '('. Returns offset of matching ')'.
195    let bytes = s.as_bytes();
196    if bytes.is_empty() || bytes[0] != b'(' {
197        return None;
198    }
199    let mut depth = 1;
200    let mut in_string = None::<u8>;
201    for (i, &b) in bytes.iter().enumerate().skip(1) {
202        if let Some(quote) = in_string {
203            if b == quote && bytes.get(i - 1) != Some(&b'\\') {
204                in_string = None;
205            }
206            continue;
207        }
208        match b {
209            b'"' | b'\'' => in_string = Some(b),
210            b'(' => depth += 1,
211            b')' => {
212                depth -= 1;
213                if depth == 0 {
214                    return Some(i);
215                }
216            }
217            _ => {}
218        }
219    }
220    None
221}
222
223fn parse_attrs(s: &str) -> Vec<(String, String)> {
224    let mut attrs = Vec::new();
225    let mut chars = s.char_indices().peekable();
226    while let Some((_, ch)) = chars.peek() {
227        if ch.is_whitespace() {
228            chars.next();
229            continue;
230        }
231        // Read attribute name
232        let mut name_end = 0;
233        let mut name = String::new();
234        let mut found_eq = false;
235        while let Some(&(idx, c)) = chars.peek() {
236            if c == '=' {
237                found_eq = true;
238                name_end = idx;
239                chars.next();
240                break;
241            }
242            if c.is_whitespace() {
243                name_end = idx;
244                break;
245            }
246            name.push(c);
247            chars.next();
248        }
249        let _ = name_end;
250        if !found_eq {
251            attrs.push((name, String::new()));
252            continue;
253        }
254        // Read value: quoted or unquoted
255        if let Some(&(_, q)) = chars.peek() {
256            if q == '"' || q == '\'' {
257                chars.next();
258                let mut val = String::new();
259                while let Some(&(_, c)) = chars.peek() {
260                    chars.next();
261                    if c == q {
262                        break;
263                    }
264                    val.push(c);
265                }
266                attrs.push((name, val));
267                continue;
268            }
269        }
270        // Unquoted value (read to whitespace)
271        let mut val = String::new();
272        while let Some(&(_, c)) = chars.peek() {
273            if c.is_whitespace() {
274                break;
275            }
276            val.push(c);
277            chars.next();
278        }
279        attrs.push((name, val));
280    }
281    attrs
282}