htmlm_macro/
lib.rs

1#![feature(proc_macro_diagnostic)]
2#![feature(let_chains)]
3#![feature(proc_macro_def_site)]
4
5use std::fmt::Write;
6use std::str::FromStr;
7
8use proc_macro::TokenStream;
9
10macro_rules! diag {
11    ($level:ident: $($tt:tt)*) => {diag!($level(proc_macro::Span::call_site()): $($tt)*)};
12    ($level:ident[$span:expr]: $($tt:tt)*) => {diag!($level($span.span()): $($tt)*)};
13    ($level:ident($span:expr): $($tt:tt)*) => {{
14        proc_macro::Diagnostic::spanned($span, proc_macro::Level::$level, format!($($tt)*)).emit();
15    }};
16}
17
18macro_rules! fatal_diag {
19    ($($tt:tt)*) => {{
20        diag!($($tt)*);
21        return None;
22    }};
23}
24
25#[proc_macro]
26pub fn html(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
27    let mut trees = input.into_iter().peekable();
28    let mut st = State {
29        final_code: "{ use core::fmt::Write; let mut ___out = String::new(); ".to_string(),
30        write_target: "___out".to_string(),
31        error_handler: ".unwrap()",
32        ..State::default()
33    };
34
35    if st.output_fmt(&mut trees).is_none() {
36        return proc_macro::TokenStream::default();
37    };
38
39    st.final_code.push_str("___out }");
40
41    if !st.tag_stack.is_empty() {
42        diag!(Error(proc_macro::Span::def_site()): "unclosed tags: {}", st.tag_stack);
43    }
44
45    proc_macro::TokenStream::from_str(&st.final_code).unwrap()
46}
47
48#[proc_macro]
49pub fn write_html(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
50    let mut trees = input.into_iter().peekable();
51    let mut st = State {
52        write_target: trees.next().expect("expected first token").to_string(),
53        error_handler: ".unwrap()",
54        ..State::default()
55    };
56
57    if st.output_fmt(&mut trees).is_none() {
58        return proc_macro::TokenStream::default();
59    };
60
61    if !st.tag_stack.is_empty() {
62        diag!(Error(proc_macro::Span::def_site()): "unclosed tags: {}", st.tag_stack);
63    }
64
65    proc_macro::TokenStream::from_str(&st.final_code).unwrap()
66}
67
68type Trees = std::iter::Peekable<proc_macro::token_stream::IntoIter>;
69
70#[derive(Default)]
71struct State {
72    final_code: String,
73    template: String,
74    tstr: String,
75    tag_stack: String,
76    write_target: String,
77    no_escaping: bool,
78    error_handler: &'static str,
79}
80
81impl State {
82    fn flush_template(&mut self) {
83        if self.template.is_empty() {
84            return;
85        }
86        write!(
87            self.final_code,
88            "{}.write_str({:?}){};",
89            self.write_target, self.template, self.error_handler
90        )
91        .unwrap();
92        self.template.clear();
93    }
94
95    fn display_expr(&mut self, expr: impl std::fmt::Display) {
96        self.flush_template();
97        if std::mem::take(&mut self.no_escaping) {
98            write!(
99                self.final_code,
100                "write!({}, \"{{}}\", {expr}){};",
101                self.write_target, self.error_handler
102            )
103            .unwrap();
104        } else {
105            write!(
106                self.final_code,
107                "write!({}, \"{{}}\", &htmlm::HtmlEscaped(&{expr})){};",
108                self.write_target, self.error_handler,
109            )
110            .unwrap()
111        }
112    }
113
114    fn expect_group(
115        &mut self,
116        trees: &mut Trees,
117        delim: proc_macro::Delimiter,
118    ) -> Option<proc_macro::TokenStream> {
119        match trees.next() {
120            Some(proc_macro::TokenTree::Group(g)) if g.delimiter() == delim => Some(g.stream()),
121            Some(c) => fatal_diag!(Error[c]: "expected {delim:?} group"),
122            None => fatal_diag!(Error: "expected {delim:?} group, got eof"),
123        }
124    }
125
126    fn expect_punct(&mut self, trees: &mut Trees, c: char) -> Option<()> {
127        match trees.next() {
128            Some(proc_macro::TokenTree::Punct(p)) if p.as_char() == c => Some(()),
129            Some(c) => fatal_diag!(Error[c]: "expected {c}"),
130            None => fatal_diag!(Error: "expected {c}, got eof"),
131        }
132    }
133
134    fn tag(&mut self, p: proc_macro::Punct, trees: &mut Trees) -> Option<()> {
135        let temp = match trees.next() {
136            Some(proc_macro::TokenTree::Ident(id)) => {
137                let temp = temp_str(&id, &mut self.tstr);
138                if !is_html_tag(temp) {
139                    diag!(Error[p]: "expected html5 tag name");
140                }
141                temp
142            }
143            Some(proc_macro::TokenTree::Literal(lit)) => {
144                let temp = temp_str(&lit, &mut self.tstr).trim_matches('"');
145                if is_html_tag(temp) {
146                    if !temp.contains('!') {
147                        diag!(Warning[p]: "unnescessary string escaping");
148                    }
149                } else if !is_valid_webcomponent(temp) {
150                    diag!(Error[p]: "invalid web component identifier");
151                }
152                temp
153            }
154            Some(proc_macro::TokenTree::Punct(p)) if p.as_char() == '/' => {
155                let Some((_, top)) = self.tag_stack.rsplit_once(',') else {
156                    fatal_diag!(Error[p]: "no tag to close");
157                };
158
159                let new_tag_stack_len = self.tag_stack.len() - top.len() - 1;
160
161                let (temp, span) = match trees.next() {
162                    Some(proc_macro::TokenTree::Ident(id)) => {
163                        (temp_str(&id, &mut self.tstr), id.span())
164                    }
165                    Some(proc_macro::TokenTree::Literal(lit)) => {
166                        (temp_str(&lit, &mut self.tstr).trim_matches('"'), lit.span())
167                    }
168                    Some(proc_macro::TokenTree::Punct(p)) if p.as_char() == '>' => {
169                        // easter egg
170                        write!(&mut self.template, "</{top}>").unwrap();
171                        self.tag_stack.truncate(new_tag_stack_len);
172                        return Some(());
173                    }
174                    Some(c) => fatal_diag!(Error[c]: "unexpected token in closing tag"),
175                    None => {
176                        fatal_diag!(Error[p]: "expected tag ident or string or '>'")
177                    }
178                };
179
180                if temp != top {
181                    diag!(Error(span): "expected closing '{top}' tag");
182                }
183
184                write!(&mut self.template, "</{top}>").unwrap();
185                self.expect_punct(trees, '>')?;
186                self.tag_stack.truncate(new_tag_stack_len);
187                return Some(());
188            }
189            _ => fatal_diag!(Error[p]: "expected tag ident or string literal"),
190        };
191
192        write!(&mut self.template, "<{temp}").unwrap();
193        if !is_self_closing(temp) {
194            write!(self.tag_stack, ",{temp}").unwrap();
195        }
196
197        let mut has_attr = false;
198        while let Some(c) = trees.next() {
199            let mut has_attr_tmp = false;
200            match c {
201                proc_macro::TokenTree::Punct(p) if p.as_char() == '=' && has_attr => loop {
202                    match trees.next() {
203                        Some(proc_macro::TokenTree::Punct(p)) if p.as_char() == '!' => {
204                            self.no_escaping = true;
205                            continue;
206                        }
207                        Some(proc_macro::TokenTree::Literal(lit)) => {
208                            write!(&mut self.template, "={lit}").unwrap();
209                        }
210                        Some(proc_macro::TokenTree::Ident(id)) => {
211                            write!(&mut self.template, "=\"").unwrap();
212                            self.display_expr(id);
213                            write!(&mut self.template, "\"").unwrap();
214                        }
215                        Some(proc_macro::TokenTree::Group(g))
216                            if g.delimiter() == proc_macro::Delimiter::Brace =>
217                        {
218                            write!(&mut self.template, "=\"").unwrap();
219                            self.display_expr(g.stream());
220                            write!(&mut self.template, "\"").unwrap();
221                        }
222                        Some(c) => {
223                            diag!(Error[c]: "unexpected token in attr value")
224                        }
225                        None => diag!(Error[p]: "expected attribute value"),
226                    }
227                    break;
228                },
229                proc_macro::TokenTree::Punct(p) if p.as_char() == '>' => {
230                    write!(&mut self.template, ">").unwrap();
231                    break;
232                }
233                proc_macro::TokenTree::Ident(id) => {
234                    write!(&mut self.template, " {id}").unwrap();
235                    has_attr_tmp = true;
236                }
237                proc_macro::TokenTree::Literal(lit) => {
238                    let temp = temp_str(&lit, &mut self.tstr).trim_matches('"');
239                    if !is_valid_html_attt_name(temp) {
240                        diag!(Error[p]: "invalid attribute name");
241                    }
242                    write!(&mut self.template, " {temp}").unwrap();
243                    has_attr_tmp = true;
244                }
245                c => diag!(Error[c]: "unexpected token in attribute list"),
246            }
247            has_attr = has_attr_tmp;
248        }
249
250        Some(())
251    }
252
253    fn matches_char(t: &proc_macro::TokenTree, ch: char, spacing: proc_macro::Spacing) -> bool {
254        matches!(t, proc_macro::TokenTree::Punct(p) if p.as_char() == ch && p.spacing() == spacing)
255    }
256
257    fn expr_then_braces(&mut self, trees: &mut Trees) -> Option<(TokenStream, TokenStream)> {
258        let mut body = None;
259        let iter = trees
260            .by_ref()
261            .map_while(|t| {
262                if let proc_macro::TokenTree::Group(g) = &t
263                    && g.delimiter() == proc_macro::Delimiter::Brace
264                {
265                    body = Some(g.stream());
266                    None
267                } else {
268                    Some(t)
269                }
270            })
271            .collect::<proc_macro::TokenStream>();
272        let Some(body) = body else {
273            fatal_diag!(Error: "expected '{{' group, got eof");
274        };
275        Some((iter, body))
276    }
277
278    fn if_expr(&mut self, trees: &mut Trees) -> Option<()> {
279        let (cond, body) = self.expr_then_braces(trees)?;
280
281        let else_body = match trees.peek() {
282            Some(proc_macro::TokenTree::Ident(id)) if temp_str(id, &mut self.tstr) == "else" => {
283                trees.next();
284                self.expect_group(trees, proc_macro::Delimiter::Brace)?
285            }
286            _ => proc_macro::TokenStream::new(),
287        };
288
289        self.tstr.clear();
290
291        self.flush_template();
292
293        write!(&mut self.final_code, "if {cond} {{").unwrap();
294        self.output_fmt(&mut body.into_iter().peekable())?;
295        write!(&mut self.final_code, "}}").unwrap();
296        if !else_body.is_empty() {
297            write!(&mut self.final_code, " else {{").unwrap();
298            self.output_fmt(&mut else_body.into_iter().peekable())?;
299            write!(&mut self.final_code, "}}").unwrap();
300        }
301        Some(())
302    }
303
304    fn match_expr(&mut self, trees: &mut Trees) -> Option<()> {
305        let (expr, body) = self.expr_then_braces(trees)?;
306        self.flush_template();
307
308        write!(&mut self.final_code, "match {expr} {{").unwrap();
309
310        let mut body = body.into_iter().peekable();
311        loop {
312            let mut pattern = proc_macro::TokenStream::new();
313            let mut looped = false;
314
315            while let Some(c) = body.next() {
316                if !Self::matches_char(&c, '=', proc_macro::Spacing::Joint) {
317                    pattern.extend([c]);
318                    looped = true;
319                    continue;
320                }
321
322                let nc = body.next().expect("haaaaa");
323                if Self::matches_char(&nc, '>', proc_macro::Spacing::Alone) {
324                    break;
325                }
326                pattern.extend([c, nc]);
327                looped = true;
328            }
329
330            if !looped {
331                break;
332            }
333
334            let body = self.expect_group(&mut body, proc_macro::Delimiter::Brace)?;
335
336            write!(&mut self.final_code, "{pattern} => {{").unwrap();
337            self.output_fmt(&mut body.into_iter().peekable())?;
338            write!(&mut self.final_code, "}}").unwrap();
339        }
340
341        write!(&mut self.final_code, "}}").unwrap();
342
343        Some(())
344    }
345
346    fn for_expr(&mut self, trees: &mut Trees) -> Option<()> {
347        let mut ink = proc_macro::Span::call_site();
348        let loop_var = trees
349            .by_ref()
350            .take_while(|t| {
351                ink = t.span();
352                !matches!(t, proc_macro::TokenTree::Ident(id) if temp_str(id, &mut self.tstr) == "in")
353            })
354            .collect::<proc_macro::TokenStream>();
355
356        let (iter, body) = self.expr_then_braces(trees)?;
357
358        let else_body = match trees.peek() {
359            Some(proc_macro::TokenTree::Ident(id)) if temp_str(id, &mut self.tstr) == "else" => {
360                trees.next();
361                self.expect_group(trees, proc_macro::Delimiter::Brace)?
362            }
363            _ => proc_macro::TokenStream::new(),
364        };
365
366        self.tstr.clear();
367
368        self.flush_template();
369
370        if else_body.is_empty() {
371            write!(&mut self.final_code, "for {loop_var} in {iter} {{").unwrap();
372        } else {
373            write!(
374                &mut self.final_code,
375                "let mut looped = false;\
376            for {loop_var} in {iter} {{\
377            looped = true;"
378            )
379            .unwrap();
380        }
381        self.output_fmt(&mut body.into_iter().peekable())?;
382        write!(&mut self.final_code, "}}").unwrap();
383        if !else_body.is_empty() {
384            write!(&mut self.final_code, "if !looped {{").unwrap();
385            self.output_fmt(&mut else_body.into_iter().peekable())?;
386            write!(&mut self.final_code, "}}").unwrap();
387        }
388        Some(())
389    }
390
391    fn nested(&mut self, trees: &mut Trees) -> Option<()> {
392        let ident = match trees.next() {
393            Some(proc_macro::TokenTree::Ident(ident)) => ident,
394            Some(g) => fatal_diag!(Error[g]: "expected identifier"),
395            None => fatal_diag!(Error: "expected identifier, got eof"),
396        };
397
398        let closing_pipe = trees.next();
399        if !matches!(&closing_pipe, Some(proc_macro::TokenTree::Punct(p)) if p.as_char() == '|') {
400            match closing_pipe {
401                Some(closing_pipe) => fatal_diag!(Error[closing_pipe]: "expected '|'"),
402                None => fatal_diag!(Error: "expected '|', got eof"),
403            }
404        }
405
406        self.flush_template();
407
408        write!(
409            &mut self.final_code,
410            "{{ let {ident} = &mut {};",
411            self.write_target
412        )
413        .unwrap();
414
415        let stream = self.expect_group(trees, proc_macro::Delimiter::Brace)?;
416        write!(&mut self.final_code, "{stream};}}").unwrap();
417
418        Some(())
419    }
420
421    fn output_fmt(&mut self, trees: &mut Trees) -> Option<()> {
422        while let Some(c) = trees.next() {
423            match c {
424                proc_macro::TokenTree::Punct(p) if p.as_char() == '!' => self.no_escaping = true,
425                proc_macro::TokenTree::Punct(p) if p.as_char() == '<' => self.tag(p, trees)?,
426                proc_macro::TokenTree::Punct(p) if p.as_char() == '|' => self.nested(trees)?,
427                proc_macro::TokenTree::Literal(lit) => self
428                    .template
429                    .push_str(temp_str(&lit, &mut self.tstr).trim_matches('"')),
430                proc_macro::TokenTree::Ident(id) => match temp_str(&id, &mut self.tstr) {
431                    "if" => self.if_expr(trees)?,
432                    "match" => self.match_expr(trees)?,
433                    "for" => self.for_expr(trees)?,
434                    _ => self.display_expr(id),
435                },
436                proc_macro::TokenTree::Group(g)
437                    if g.delimiter() == proc_macro::Delimiter::Brace =>
438                {
439                    self.display_expr(g.stream());
440                }
441                c => fatal_diag!(Error[c]: "unexpected token"),
442            }
443        }
444        self.flush_template();
445
446        Some(())
447    }
448}
449
450fn temp_str(i: impl std::fmt::Display, buf: &mut String) -> &str {
451    buf.clear();
452    write!(buf, "{i}").unwrap();
453    buf
454}
455
456fn is_valid_html_attt_name(tag: &str) -> bool {
457    tag.bytes()
458        .all(|c| matches!(c, b'a'..=b'z' | b'_' | b'-' | b'A'..=b'Z' | 128..=u8::MAX))
459}
460
461fn is_valid_webcomponent(tag: &str) -> bool {
462    let mut seen_dash = false;
463    tag.bytes()
464        .inspect(|c| seen_dash |= *c == b'-')
465        .all(|c| matches!(c, b'a'..=b'z' | b'_' | b'-' | b'.' | 128..=u8::MAX))
466        && seen_dash
467}
468
469fn is_html_tag(tag: &str) -> bool {
470    matches!(
471        tag,
472        "!DOCTYPE"
473            | "a"
474            | "abbr"
475            | "acronym"
476            | "address"
477            | "area"
478            | "article"
479            | "aside"
480            | "audio"
481            | "b"
482            | "base"
483            | "basefont"
484            | "bdi"
485            | "bdo"
486            | "big"
487            | "blockquote"
488            | "body"
489            | "br"
490            | "button"
491            | "canvas"
492            | "caption"
493            | "center"
494            | "cite"
495            | "code"
496            | "col"
497            | "colgroup"
498            | "data"
499            | "datalist"
500            | "dd"
501            | "del"
502            | "details"
503            | "dfn"
504            | "dialog"
505            | "div"
506            | "dl"
507            | "dt"
508            | "em"
509            | "embed"
510            | "fieldset"
511            | "figcaption"
512            | "figure"
513            | "footer"
514            | "form"
515            | "h1"
516            | "h2"
517            | "h3"
518            | "h4"
519            | "h5"
520            | "h6"
521            | "head"
522            | "header"
523            | "hr"
524            | "html"
525            | "i"
526            | "iframe"
527            | "img"
528            | "input"
529            | "ins"
530            | "kbd"
531            | "label"
532            | "legend"
533            | "li"
534            | "link"
535            | "main"
536            | "map"
537            | "mark"
538            | "meta"
539            | "meter"
540            | "nav"
541            | "noscript"
542            | "object"
543            | "ol"
544            | "optgroup"
545            | "option"
546            | "output"
547            | "p"
548            | "param"
549            | "picture"
550            | "pre"
551            | "progress"
552            | "q"
553            | "rp"
554            | "rt"
555            | "ruby"
556            | "s"
557            | "samp"
558            | "script"
559            | "section"
560            | "select"
561            | "small"
562            | "source"
563            | "span"
564            | "strong"
565            | "style"
566            | "sub"
567            | "summary"
568            | "sup"
569            | "svg"
570            | "table"
571            | "tbody"
572            | "td"
573            | "template"
574            | "textarea"
575            | "tfoot"
576            | "th"
577            | "thead"
578            | "time"
579            | "title"
580            | "tr"
581            | "track"
582            | "u"
583            | "ul"
584            | "var"
585            | "video"
586            | "wbr"
587    )
588}
589
590fn is_self_closing(tag: &str) -> bool {
591    matches!(
592        tag,
593        "!DOCTYPE"
594            | "area"
595            | "base"
596            | "br"
597            | "col"
598            | "embed"
599            | "hr"
600            | "img"
601            | "input"
602            | "link"
603            | "meta"
604            | "param"
605            | "source"
606            | "track"
607            | "wbr"
608    )
609}