Skip to main content

forge_codegen/
parser.rs

1//! Parser for Forge `.forge.html` files. Produces a flat token stream that the
2//! lowering pass converts to Askama syntax.
3//!
4//! We use a simple linear scanner rather than `chumsky` here because Forge's
5//! grammar is mostly token-by-token rewrites — a recursive parser would be
6//! overkill and harder to debug for the POC.
7
8use std::fmt;
9
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub enum Token {
12    /// Literal HTML/text content.
13    Text(String),
14
15    /// `{{ expr }}` — escaped output.
16    EscapedExpr(String),
17
18    /// `{!! expr !!}` — unescaped output.
19    RawExpr(String),
20
21    /// `@directive(args)` — Blade-style directive.
22    Directive { name: String, args: Option<String> },
23
24    /// `<x-name attr="val">` — component open.
25    ComponentOpen {
26        name: String,
27        attrs: Vec<(String, String)>,
28        self_closing: bool,
29    },
30
31    /// `</x-name>` — component close.
32    ComponentClose { name: String },
33}
34
35impl fmt::Display for Token {
36    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
37        match self {
38            Token::Text(s) => write!(f, "{s}"),
39            Token::EscapedExpr(e) => write!(f, "{{{{ {e} }}}}"),
40            Token::RawExpr(e) => write!(f, "{{!! {e} !!}}"),
41            Token::Directive { name, args: Some(a) } => write!(f, "@{name}({a})"),
42            Token::Directive { name, args: None } => write!(f, "@{name}"),
43            Token::ComponentOpen { name, attrs, self_closing } => {
44                write!(f, "<x-{name}")?;
45                for (k, v) in attrs {
46                    write!(f, " {k}=\"{v}\"")?;
47                }
48                if *self_closing {
49                    write!(f, " />")
50                } else {
51                    write!(f, ">")
52                }
53            }
54            Token::ComponentClose { name } => write!(f, "</x-{name}>"),
55        }
56    }
57}
58
59pub fn tokenize(input: &str) -> Vec<Token> {
60    let mut tokens = Vec::new();
61    let bytes = input.as_bytes();
62    let mut i = 0;
63    let mut text_start = 0;
64
65    while i < bytes.len() {
66        // {{ ... }}
67        if i + 1 < bytes.len() && bytes[i] == b'{' && bytes[i + 1] == b'{' && !(i + 2 < bytes.len() && bytes[i + 2] == b'-' /* askama escape */) {
68            flush_text(input, text_start, i, &mut tokens);
69            if let Some(end) = find_close(&input[i + 2..], "}}") {
70                let expr = input[i + 2..i + 2 + end].trim().to_string();
71                tokens.push(Token::EscapedExpr(expr));
72                i += 2 + end + 2;
73                text_start = i;
74                continue;
75            }
76        }
77
78        // {!! ... !!}
79        if i + 2 < bytes.len() && bytes[i] == b'{' && bytes[i + 1] == b'!' && bytes[i + 2] == b'!' {
80            flush_text(input, text_start, i, &mut tokens);
81            if let Some(end) = find_close(&input[i + 3..], "!!}") {
82                let expr = input[i + 3..i + 3 + end].trim().to_string();
83                tokens.push(Token::RawExpr(expr));
84                i += 3 + end + 3;
85                text_start = i;
86                continue;
87            }
88        }
89
90        // @directive
91        if bytes[i] == b'@' && i + 1 < bytes.len() && (bytes[i + 1].is_ascii_alphabetic() || bytes[i + 1] == b'_') {
92            // Escape sequence: `@@` → literal `@`
93            if i + 1 < bytes.len() && bytes[i + 1] == b'@' {
94                // shouldn't reach here since we check alphabetic above
95            }
96            flush_text(input, text_start, i, &mut tokens);
97            let dir_start = i + 1;
98            let mut dir_end = dir_start;
99            while dir_end < bytes.len() && (bytes[dir_end].is_ascii_alphanumeric() || bytes[dir_end] == b'_') {
100                dir_end += 1;
101            }
102            let name = input[dir_start..dir_end].to_string();
103            let mut args = None;
104            let mut new_i = dir_end;
105            if dir_end < bytes.len() && bytes[dir_end] == b'(' {
106                if let Some(close_offset) = find_matching_paren(&input[dir_end..]) {
107                    args = Some(input[dir_end + 1..dir_end + close_offset].to_string());
108                    new_i = dir_end + close_offset + 1;
109                }
110            }
111            tokens.push(Token::Directive { name, args });
112            i = new_i;
113            text_start = i;
114            continue;
115        }
116
117        // <x-component ...>
118        if bytes[i] == b'<' && i + 2 < bytes.len() && bytes[i + 1] == b'x' && bytes[i + 2] == b'-' {
119            flush_text(input, text_start, i, &mut tokens);
120            let after = &input[i + 3..];
121            // Component name
122            let name_end = after
123                .find(|c: char| c.is_whitespace() || c == '>' || c == '/')
124                .unwrap_or(after.len());
125            let name = after[..name_end].to_string();
126            let rest_start = i + 3 + name_end;
127            let close_offset = input[rest_start..]
128                .find('>')
129                .unwrap_or(input.len() - rest_start);
130            let tag_inner = &input[rest_start..rest_start + close_offset];
131            let self_closing = tag_inner.ends_with('/');
132            let attrs = parse_attrs(tag_inner.trim_end_matches('/'));
133            tokens.push(Token::ComponentOpen {
134                name,
135                attrs,
136                self_closing,
137            });
138            i = rest_start + close_offset + 1;
139            text_start = i;
140            continue;
141        }
142
143        // </x-component>
144        if bytes[i] == b'<' && i + 3 < bytes.len() && bytes[i + 1] == b'/' && bytes[i + 2] == b'x' && bytes[i + 3] == b'-' {
145            flush_text(input, text_start, i, &mut tokens);
146            let after = &input[i + 4..];
147            let name_end = after.find('>').unwrap_or(after.len());
148            let name = after[..name_end].trim().to_string();
149            tokens.push(Token::ComponentClose { name });
150            i += 4 + name_end + 1;
151            text_start = i;
152            continue;
153        }
154
155        i += 1;
156    }
157
158    flush_text(input, text_start, bytes.len(), &mut tokens);
159    tokens
160}
161
162fn flush_text(input: &str, start: usize, end: usize, tokens: &mut Vec<Token>) {
163    if end > start {
164        tokens.push(Token::Text(input[start..end].to_string()));
165    }
166}
167
168fn find_close(s: &str, needle: &str) -> Option<usize> {
169    s.find(needle)
170}
171
172fn find_matching_paren(s: &str) -> Option<usize> {
173    // s starts with '('. Returns offset of matching ')'.
174    let bytes = s.as_bytes();
175    if bytes.is_empty() || bytes[0] != b'(' {
176        return None;
177    }
178    let mut depth = 1;
179    let mut in_string = None::<u8>;
180    for (i, &b) in bytes.iter().enumerate().skip(1) {
181        if let Some(quote) = in_string {
182            if b == quote && bytes.get(i - 1) != Some(&b'\\') {
183                in_string = None;
184            }
185            continue;
186        }
187        match b {
188            b'"' | b'\'' => in_string = Some(b),
189            b'(' => depth += 1,
190            b')' => {
191                depth -= 1;
192                if depth == 0 {
193                    return Some(i);
194                }
195            }
196            _ => {}
197        }
198    }
199    None
200}
201
202fn parse_attrs(s: &str) -> Vec<(String, String)> {
203    let mut attrs = Vec::new();
204    let mut chars = s.char_indices().peekable();
205    while let Some((_, ch)) = chars.peek() {
206        if ch.is_whitespace() {
207            chars.next();
208            continue;
209        }
210        // Read attribute name
211        let mut name_end = 0;
212        let mut name = String::new();
213        let mut found_eq = false;
214        while let Some(&(idx, c)) = chars.peek() {
215            if c == '=' {
216                found_eq = true;
217                name_end = idx;
218                chars.next();
219                break;
220            }
221            if c.is_whitespace() {
222                name_end = idx;
223                break;
224            }
225            name.push(c);
226            chars.next();
227        }
228        let _ = name_end;
229        if !found_eq {
230            attrs.push((name, String::new()));
231            continue;
232        }
233        // Read value: quoted or unquoted
234        if let Some(&(_, q)) = chars.peek() {
235            if q == '"' || q == '\'' {
236                chars.next();
237                let mut val = String::new();
238                while let Some(&(_, c)) = chars.peek() {
239                    chars.next();
240                    if c == q {
241                        break;
242                    }
243                    val.push(c);
244                }
245                attrs.push((name, val));
246                continue;
247            }
248        }
249        // Unquoted value (read to whitespace)
250        let mut val = String::new();
251        while let Some(&(_, c)) = chars.peek() {
252            if c.is_whitespace() {
253                break;
254            }
255            val.push(c);
256            chars.next();
257        }
258        attrs.push((name, val));
259    }
260    attrs
261}