Skip to main content

htmlm_macro/
lib.rs

1#![feature(proc_macro_diagnostic)]
2#![feature(proc_macro_def_site)]
3
4use std::fmt::Write;
5use std::str::FromStr;
6
7use proc_macro::TokenStream;
8
9macro_rules! diag {
10    ($level:ident: $($tt:tt)*) => {diag!($level(proc_macro::Span::call_site()): $($tt)*)};
11    ($level:ident[$span:expr]: $($tt:tt)*) => {diag!($level($span.span()): $($tt)*)};
12    ($level:ident($span:expr): $($tt:tt)*) => {{
13        proc_macro::Diagnostic::spanned($span, proc_macro::Level::$level, format!($($tt)*)).emit();
14    }};
15}
16
17macro_rules! fatal_diag {
18    ($($tt:tt)*) => {{
19        diag!($($tt)*);
20        return None;
21    }};
22}
23
24#[proc_macro]
25pub fn html(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
26    let mut trees = input.into_iter().peekable();
27    let mut st = State {
28        final_code: "{ use core::fmt::Write; let mut ___out = String::new(); ".to_string(),
29        write_target: "___out".to_string(),
30        error_handler: ".unwrap()",
31        ..State::default()
32    };
33
34    if st.output_fmt(&mut trees).is_none() {
35        return proc_macro::TokenStream::default();
36    };
37
38    st.final_code.push_str("___out }");
39
40    if !st.tag_stack.is_empty() {
41        diag!(Error(proc_macro::Span::def_site()): "unclosed tags: {}", st.tag_stack);
42    }
43
44    proc_macro::TokenStream::from_str(&st.final_code).unwrap()
45}
46
47#[proc_macro]
48pub fn write_html(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
49    let mut trees = input.into_iter().peekable();
50    let mut st = State {
51        write_target: trees.next().expect("expected first token").to_string(),
52        error_handler: ".unwrap()",
53        ..State::default()
54    };
55
56    if st.output_fmt(&mut trees).is_none() {
57        return proc_macro::TokenStream::default();
58    };
59
60    if !st.tag_stack.is_empty() {
61        diag!(Error(proc_macro::Span::def_site()): "unclosed tags: {}", st.tag_stack);
62    }
63
64    proc_macro::TokenStream::from_str(&st.final_code).unwrap()
65}
66
67type Trees = std::iter::Peekable<proc_macro::token_stream::IntoIter>;
68
69#[derive(Default)]
70struct State {
71    final_code: String,
72    template: String,
73    tstr: String,
74    tag_stack: String,
75    write_target: String,
76    no_escaping: bool,
77    error_handler: &'static str,
78}
79
80impl State {
81    fn flush_template(&mut self) {
82        if self.template.is_empty() {
83            return;
84        }
85        write!(
86            self.final_code,
87            "{}.write_str({:?}){};",
88            self.write_target, self.template, self.error_handler
89        )
90        .unwrap();
91        self.template.clear();
92    }
93
94    fn display_expr(&mut self, expr: impl std::fmt::Display) {
95        self.flush_template();
96        if std::mem::take(&mut self.no_escaping) {
97            write!(
98                self.final_code,
99                "write!({}, \"{{}}\", {expr}){};",
100                self.write_target, self.error_handler
101            )
102            .unwrap();
103        } else {
104            write!(
105                self.final_code,
106                "write!({}, \"{{}}\", &htmlm::HtmlEscaped(&{expr})){};",
107                self.write_target, self.error_handler,
108            )
109            .unwrap()
110        }
111    }
112
113    fn expect_group(
114        &mut self,
115        trees: &mut Trees,
116        delim: proc_macro::Delimiter,
117    ) -> Option<proc_macro::TokenStream> {
118        match trees.next() {
119            Some(proc_macro::TokenTree::Group(g)) if g.delimiter() == delim => Some(g.stream()),
120            Some(c) => fatal_diag!(Error[c]: "expected {delim:?} group"),
121            None => fatal_diag!(Error: "expected {delim:?} group, got eof"),
122        }
123    }
124
125    fn expect_punct(&mut self, trees: &mut Trees, c: char) -> Option<()> {
126        match trees.next() {
127            Some(proc_macro::TokenTree::Punct(p)) if p.as_char() == c => Some(()),
128            Some(c) => fatal_diag!(Error[c]: "expected {c}"),
129            None => fatal_diag!(Error: "expected {c}, got eof"),
130        }
131    }
132
133    fn tag(&mut self, p: proc_macro::Punct, trees: &mut Trees) -> Option<()> {
134        let temp = match trees.next() {
135            Some(proc_macro::TokenTree::Ident(id)) => {
136                let temp = temp_str(&id, &mut self.tstr);
137                if !is_html_tag(temp) {
138                    diag!(Error[p]: "expected html5 tag name");
139                }
140                temp
141            }
142            Some(proc_macro::TokenTree::Literal(lit)) => {
143                let temp = temp_str(&lit, &mut self.tstr).trim_matches('"');
144                if is_html_tag(temp) {
145                    if !temp.contains('!') {
146                        diag!(Warning[p]: "unnescessary string escaping");
147                    }
148                } else if !is_valid_webcomponent(temp) {
149                    diag!(Error[p]: "invalid web component identifier");
150                }
151                temp
152            }
153            Some(proc_macro::TokenTree::Punct(p)) if p.as_char() == '/' => {
154                let Some((_, top)) = self.tag_stack.rsplit_once(',') else {
155                    fatal_diag!(Error[p]: "no tag to close");
156                };
157
158                let new_tag_stack_len = self.tag_stack.len() - top.len() - 1;
159
160                let (temp, span) = match trees.next() {
161                    Some(proc_macro::TokenTree::Ident(id)) => {
162                        (temp_str(&id, &mut self.tstr), id.span())
163                    }
164                    Some(proc_macro::TokenTree::Literal(lit)) => {
165                        (temp_str(&lit, &mut self.tstr).trim_matches('"'), lit.span())
166                    }
167                    Some(proc_macro::TokenTree::Punct(p)) if p.as_char() == '>' => {
168                        // easter egg
169                        write!(&mut self.template, "</{top}>").unwrap();
170                        self.tag_stack.truncate(new_tag_stack_len);
171                        return Some(());
172                    }
173                    Some(c) => fatal_diag!(Error[c]: "unexpected token in closing tag"),
174                    None => {
175                        fatal_diag!(Error[p]: "expected tag ident or string or '>'")
176                    }
177                };
178
179                if temp != top {
180                    diag!(Error(span): "expected closing '{top}' tag");
181                }
182
183                write!(&mut self.template, "</{top}>").unwrap();
184                self.expect_punct(trees, '>')?;
185                self.tag_stack.truncate(new_tag_stack_len);
186                return Some(());
187            }
188            _ => fatal_diag!(Error[p]: "expected tag ident or string literal"),
189        };
190
191        write!(&mut self.template, "<{temp}").unwrap();
192        if !is_self_closing(temp) {
193            write!(self.tag_stack, ",{temp}").unwrap();
194        }
195
196        let mut has_attr = false;
197        while let Some(c) = trees.next() {
198            let mut has_attr_tmp = false;
199            match c {
200                proc_macro::TokenTree::Punct(p) if p.as_char() == '=' && has_attr => loop {
201                    match trees.next() {
202                        Some(proc_macro::TokenTree::Punct(p)) if p.as_char() == '!' => {
203                            self.no_escaping = true;
204                            continue;
205                        }
206                        Some(proc_macro::TokenTree::Literal(lit)) => {
207                            write!(&mut self.template, "={lit}").unwrap();
208                        }
209                        Some(proc_macro::TokenTree::Ident(id)) => {
210                            write!(&mut self.template, "=\"").unwrap();
211                            self.display_expr(id);
212                            write!(&mut self.template, "\"").unwrap();
213                        }
214                        Some(proc_macro::TokenTree::Group(g))
215                            if g.delimiter() == proc_macro::Delimiter::Brace =>
216                        {
217                            write!(&mut self.template, "=\"").unwrap();
218                            self.display_expr(g.stream());
219                            write!(&mut self.template, "\"").unwrap();
220                        }
221                        Some(c) => {
222                            diag!(Error[c]: "unexpected token in attr value")
223                        }
224                        None => diag!(Error[p]: "expected attribute value"),
225                    }
226                    break;
227                },
228                proc_macro::TokenTree::Punct(p) if p.as_char() == '>' => {
229                    write!(&mut self.template, ">").unwrap();
230                    break;
231                }
232                proc_macro::TokenTree::Ident(id) => {
233                    write!(&mut self.template, " {id}").unwrap();
234                    has_attr_tmp = true;
235                }
236                proc_macro::TokenTree::Literal(lit) => {
237                    let temp = temp_str(&lit, &mut self.tstr).trim_matches('"');
238                    if !is_valid_html_attt_name(temp) {
239                        diag!(Error[p]: "invalid attribute name");
240                    }
241                    write!(&mut self.template, " {temp}").unwrap();
242                    has_attr_tmp = true;
243                }
244                c => diag!(Error[c]: "unexpected token in attribute list"),
245            }
246            has_attr = has_attr_tmp;
247        }
248
249        Some(())
250    }
251
252    fn matches_char(t: &proc_macro::TokenTree, ch: char, spacing: proc_macro::Spacing) -> bool {
253        matches!(t, proc_macro::TokenTree::Punct(p) if p.as_char() == ch && p.spacing() == spacing)
254    }
255
256    fn expr_then_braces(&mut self, trees: &mut Trees) -> Option<(TokenStream, TokenStream)> {
257        let mut body = None;
258        let iter = trees
259            .by_ref()
260            .map_while(|t| {
261                if let proc_macro::TokenTree::Group(g) = &t
262                    && g.delimiter() == proc_macro::Delimiter::Brace
263                {
264                    body = Some(g.stream());
265                    None
266                } else {
267                    Some(t)
268                }
269            })
270            .collect::<proc_macro::TokenStream>();
271        let Some(body) = body else {
272            fatal_diag!(Error: "expected '{{' group, got eof");
273        };
274        Some((iter, body))
275    }
276
277    fn if_expr(&mut self, trees: &mut Trees) -> Option<()> {
278        let (cond, body) = self.expr_then_braces(trees)?;
279
280        let else_body = match trees.peek() {
281            Some(proc_macro::TokenTree::Ident(id)) if temp_str(id, &mut self.tstr) == "else" => {
282                trees.next();
283                self.expect_group(trees, proc_macro::Delimiter::Brace)?
284            }
285            _ => proc_macro::TokenStream::new(),
286        };
287
288        self.tstr.clear();
289
290        self.flush_template();
291
292        write!(&mut self.final_code, "if {cond} {{").unwrap();
293        self.output_fmt(&mut body.into_iter().peekable())?;
294        write!(&mut self.final_code, "}}").unwrap();
295        if !else_body.is_empty() {
296            write!(&mut self.final_code, " else {{").unwrap();
297            self.output_fmt(&mut else_body.into_iter().peekable())?;
298            write!(&mut self.final_code, "}}").unwrap();
299        }
300        Some(())
301    }
302
303    fn match_expr(&mut self, trees: &mut Trees) -> Option<()> {
304        let (expr, body) = self.expr_then_braces(trees)?;
305        self.flush_template();
306
307        write!(&mut self.final_code, "match {expr} {{").unwrap();
308
309        let mut body = body.into_iter().peekable();
310        loop {
311            let mut pattern = proc_macro::TokenStream::new();
312            let mut looped = false;
313
314            while let Some(c) = body.next() {
315                if !Self::matches_char(&c, '=', proc_macro::Spacing::Joint) {
316                    pattern.extend([c]);
317                    looped = true;
318                    continue;
319                }
320
321                let nc = body.next().expect("haaaaa");
322                if Self::matches_char(&nc, '>', proc_macro::Spacing::Alone) {
323                    break;
324                }
325                pattern.extend([c, nc]);
326                looped = true;
327            }
328
329            if !looped {
330                break;
331            }
332
333            let body = self.expect_group(&mut body, proc_macro::Delimiter::Brace)?;
334
335            write!(&mut self.final_code, "{pattern} => {{").unwrap();
336            self.output_fmt(&mut body.into_iter().peekable())?;
337            write!(&mut self.final_code, "}}").unwrap();
338        }
339
340        write!(&mut self.final_code, "}}").unwrap();
341
342        Some(())
343    }
344
345    fn for_expr(&mut self, trees: &mut Trees) -> Option<()> {
346        let mut ink = proc_macro::Span::call_site();
347        let loop_var = trees
348            .by_ref()
349            .take_while(|t| {
350                ink = t.span();
351                !matches!(t, proc_macro::TokenTree::Ident(id) if temp_str(id, &mut self.tstr) == "in")
352            })
353            .collect::<proc_macro::TokenStream>();
354
355        let (iter, body) = self.expr_then_braces(trees)?;
356
357        let else_body = match trees.peek() {
358            Some(proc_macro::TokenTree::Ident(id)) if temp_str(id, &mut self.tstr) == "else" => {
359                trees.next();
360                self.expect_group(trees, proc_macro::Delimiter::Brace)?
361            }
362            _ => proc_macro::TokenStream::new(),
363        };
364
365        self.tstr.clear();
366
367        self.flush_template();
368
369        if else_body.is_empty() {
370            write!(&mut self.final_code, "for {loop_var} in {iter} {{").unwrap();
371        } else {
372            write!(
373                &mut self.final_code,
374                "let mut looped = false;\
375            for {loop_var} in {iter} {{\
376            looped = true;"
377            )
378            .unwrap();
379        }
380        self.output_fmt(&mut body.into_iter().peekable())?;
381        write!(&mut self.final_code, "}}").unwrap();
382        if !else_body.is_empty() {
383            write!(&mut self.final_code, "if !looped {{").unwrap();
384            self.output_fmt(&mut else_body.into_iter().peekable())?;
385            write!(&mut self.final_code, "}}").unwrap();
386        }
387        Some(())
388    }
389
390    fn nested(&mut self, trees: &mut Trees) -> Option<()> {
391        let ident = match trees.next() {
392            Some(proc_macro::TokenTree::Ident(ident)) => ident,
393            Some(g) => fatal_diag!(Error[g]: "expected identifier"),
394            None => fatal_diag!(Error: "expected identifier, got eof"),
395        };
396
397        let closing_pipe = trees.next();
398        if !matches!(&closing_pipe, Some(proc_macro::TokenTree::Punct(p)) if p.as_char() == '|') {
399            match closing_pipe {
400                Some(closing_pipe) => fatal_diag!(Error[closing_pipe]: "expected '|'"),
401                None => fatal_diag!(Error: "expected '|', got eof"),
402            }
403        }
404
405        self.flush_template();
406
407        write!(
408            &mut self.final_code,
409            "{{ let {ident} = &mut {};",
410            self.write_target
411        )
412        .unwrap();
413
414        let stream = self.expect_group(trees, proc_macro::Delimiter::Brace)?;
415        write!(&mut self.final_code, "{stream};}}").unwrap();
416
417        Some(())
418    }
419
420    fn output_fmt(&mut self, trees: &mut Trees) -> Option<()> {
421        while let Some(c) = trees.next() {
422            match c {
423                proc_macro::TokenTree::Punct(p) if p.as_char() == '!' => self.no_escaping = true,
424                proc_macro::TokenTree::Punct(p) if p.as_char() == '<' => self.tag(p, trees)?,
425                proc_macro::TokenTree::Punct(p) if p.as_char() == '|' => self.nested(trees)?,
426                proc_macro::TokenTree::Literal(lit) => self
427                    .template
428                    .push_str(temp_str(&lit, &mut self.tstr).trim_matches('"')),
429                proc_macro::TokenTree::Ident(id) => match temp_str(&id, &mut self.tstr) {
430                    "if" => self.if_expr(trees)?,
431                    "match" => self.match_expr(trees)?,
432                    "for" => self.for_expr(trees)?,
433                    _ => self.display_expr(id),
434                },
435                proc_macro::TokenTree::Group(g)
436                    if g.delimiter() == proc_macro::Delimiter::Brace =>
437                {
438                    self.display_expr(g.stream());
439                }
440                c => fatal_diag!(Error[c]: "unexpected token"),
441            }
442        }
443        self.flush_template();
444
445        Some(())
446    }
447}
448
449fn temp_str(i: impl std::fmt::Display, buf: &mut String) -> &str {
450    buf.clear();
451    write!(buf, "{i}").unwrap();
452    buf
453}
454
455fn is_valid_html_attt_name(tag: &str) -> bool {
456    tag.bytes()
457        .all(|c| matches!(c, b'a'..=b'z' | b'_' | b'-' | b'A'..=b'Z' | 128..=u8::MAX))
458}
459
460fn is_valid_webcomponent(tag: &str) -> bool {
461    let mut seen_dash = false;
462    tag.bytes()
463        .inspect(|c| seen_dash |= *c == b'-')
464        .all(|c| matches!(c, b'a'..=b'z' | b'_' | b'-' | b'.' | 128..=u8::MAX))
465        && seen_dash
466}
467
468fn is_html_tag(tag: &str) -> bool {
469    matches!(
470        tag,
471        "!DOCTYPE"
472            | "a"
473            | "abbr"
474            | "acronym"
475            | "address"
476            | "area"
477            | "article"
478            | "aside"
479            | "audio"
480            | "b"
481            | "base"
482            | "basefont"
483            | "bdi"
484            | "bdo"
485            | "big"
486            | "blockquote"
487            | "body"
488            | "br"
489            | "button"
490            | "canvas"
491            | "caption"
492            | "center"
493            | "cite"
494            | "code"
495            | "col"
496            | "colgroup"
497            | "data"
498            | "datalist"
499            | "dd"
500            | "del"
501            | "details"
502            | "dfn"
503            | "dialog"
504            | "div"
505            | "dl"
506            | "dt"
507            | "em"
508            | "embed"
509            | "fieldset"
510            | "figcaption"
511            | "figure"
512            | "footer"
513            | "form"
514            | "h1"
515            | "h2"
516            | "h3"
517            | "h4"
518            | "h5"
519            | "h6"
520            | "head"
521            | "header"
522            | "hr"
523            | "html"
524            | "i"
525            | "iframe"
526            | "img"
527            | "input"
528            | "ins"
529            | "kbd"
530            | "label"
531            | "legend"
532            | "li"
533            | "link"
534            | "main"
535            | "map"
536            | "mark"
537            | "meta"
538            | "meter"
539            | "nav"
540            | "noscript"
541            | "object"
542            | "ol"
543            | "optgroup"
544            | "option"
545            | "output"
546            | "p"
547            | "param"
548            | "picture"
549            | "pre"
550            | "progress"
551            | "q"
552            | "rp"
553            | "rt"
554            | "ruby"
555            | "s"
556            | "samp"
557            | "script"
558            | "section"
559            | "select"
560            | "small"
561            | "source"
562            | "span"
563            | "strong"
564            | "style"
565            | "sub"
566            | "summary"
567            | "sup"
568            | "svg"
569            | "table"
570            | "tbody"
571            | "td"
572            | "template"
573            | "textarea"
574            | "tfoot"
575            | "th"
576            | "thead"
577            | "time"
578            | "title"
579            | "tr"
580            | "track"
581            | "u"
582            | "ul"
583            | "var"
584            | "video"
585            | "wbr"
586    )
587}
588
589fn is_self_closing(tag: &str) -> bool {
590    matches!(
591        tag,
592        "!DOCTYPE"
593            | "area"
594            | "base"
595            | "br"
596            | "col"
597            | "embed"
598            | "hr"
599            | "img"
600            | "input"
601            | "link"
602            | "meta"
603            | "param"
604            | "source"
605            | "track"
606            | "wbr"
607    )
608}