docext/
lib.rs

1use {
2    base64::Engine,
3    proc_macro::TokenStream,
4    proc_macro2::{Ident, Span},
5    quote::ToTokens,
6    regex::Regex,
7    std::{
8        collections::HashSet,
9        env,
10        fs,
11        path::{self, PathBuf},
12    },
13    syn::{
14        punctuated::Punctuated,
15        token::{Bracket, Eq, Pound},
16        AttrStyle,
17        Attribute,
18        Expr,
19        ExprLit,
20        ImplItem,
21        Item,
22        Lit,
23        LitStr,
24        Meta,
25        MetaNameValue,
26        Path,
27        PathArguments,
28        PathSegment,
29        TraitItem,
30    },
31    url::Url,
32};
33
34mod parser;
35
36// TODO:
37// - Remove the dependency on url and base64 and implement this manually instead
38// - Clean up the tests in example crate, make the explanations more clear.
39//   (I.e. what is expected vs. what is not expected maybe.) Probably put it all
40//   in one comment for easier checking. Or maybe put them each on separate
41//   methods.
42// - Add a feature to skip the image size check.
43
44#[proc_macro_attribute]
45pub fn docext(attr: TokenStream, item: TokenStream) -> TokenStream {
46    if !attr.is_empty() {
47        panic!("#[docext] attribute does not take any arguments");
48    }
49
50    // Try interpreting the input as a module item.
51    match syn::parse::<Item>(item).unwrap() {
52        Item::Const(mut c) => {
53            update_doc(&mut c.attrs);
54            c.to_token_stream().into()
55        }
56        Item::Enum(mut e) => {
57            update_doc(&mut e.attrs);
58            e.to_token_stream().into()
59        }
60        Item::ExternCrate(mut c) => {
61            update_doc(&mut c.attrs);
62            c.to_token_stream().into()
63        }
64        Item::Fn(mut f) => {
65            update_doc(&mut f.attrs);
66            f.to_token_stream().into()
67        }
68        Item::ForeignMod(mut m) => {
69            update_doc(&mut m.attrs);
70            m.to_token_stream().into()
71        }
72        Item::Impl(mut i) => {
73            update_doc(&mut i.attrs);
74            i.to_token_stream().into()
75        }
76        Item::Macro(mut m) => {
77            update_doc(&mut m.attrs);
78            m.to_token_stream().into()
79        }
80        Item::Mod(mut m) => {
81            update_doc(&mut m.attrs);
82            m.to_token_stream().into()
83        }
84        Item::Static(mut s) => {
85            update_doc(&mut s.attrs);
86            s.to_token_stream().into()
87        }
88        Item::Struct(mut s) => {
89            update_doc(&mut s.attrs);
90            s.to_token_stream().into()
91        }
92        Item::Trait(mut t) => {
93            update_doc(&mut t.attrs);
94            t.to_token_stream().into()
95        }
96        Item::TraitAlias(mut t) => {
97            update_doc(&mut t.attrs);
98            t.to_token_stream().into()
99        }
100        Item::Type(mut t) => {
101            update_doc(&mut t.attrs);
102            t.to_token_stream().into()
103        }
104        Item::Union(mut u) => {
105            update_doc(&mut u.attrs);
106            u.to_token_stream().into()
107        }
108        Item::Use(mut u) => {
109            update_doc(&mut u.attrs);
110            u.to_token_stream().into()
111        }
112        Item::Verbatim(v) => {
113            // Try interpreting the input as a trait item.
114            match syn::parse::<TraitItem>(v.into()).unwrap() {
115                TraitItem::Const(mut c) => {
116                    update_doc(&mut c.attrs);
117                    c.to_token_stream().into()
118                }
119                TraitItem::Fn(mut f) => {
120                    update_doc(&mut f.attrs);
121                    f.to_token_stream().into()
122                }
123                TraitItem::Type(mut t) => {
124                    update_doc(&mut t.attrs);
125                    t.to_token_stream().into()
126                }
127                TraitItem::Macro(mut m) => {
128                    update_doc(&mut m.attrs);
129                    m.to_token_stream().into()
130                }
131                TraitItem::Verbatim(v) => {
132                    // Try interpreting the input as an impl item.
133                    match syn::parse::<ImplItem>(v.into()).unwrap() {
134                        ImplItem::Const(mut c) => {
135                            update_doc(&mut c.attrs);
136                            c.to_token_stream().into()
137                        }
138                        ImplItem::Fn(mut f) => {
139                            update_doc(&mut f.attrs);
140                            f.to_token_stream().into()
141                        }
142                        ImplItem::Type(mut t) => {
143                            update_doc(&mut t.attrs);
144                            t.to_token_stream().into()
145                        }
146                        ImplItem::Macro(mut m) => {
147                            update_doc(&mut m.attrs);
148                            m.to_token_stream().into()
149                        }
150                        other => panic!("unsupported impl item type {other:#?}"),
151                    }
152                }
153                other => panic!("unsupported trait item type {other:#?}"),
154            }
155        }
156        other => panic!("unsupported item type {other:#?}"),
157    }
158}
159
160/// Update the doc comments with KaTeX syntax rendering and image support.
161fn update_doc(attrs: &mut Vec<Attribute>) {
162    // Error if there is no doc comment, since #[docext] wouldn't do anything useful
163    // in this case.
164    if !attrs.iter().any(|attr| {
165        let Ok(name_value) = attr.meta.require_name_value() else {
166            return false;
167        };
168        name_value.path.is_ident("doc") && name_value.path.segments.len() == 1
169    }) {
170        panic!("#[docext] only applies to items with doc comments");
171    }
172
173    // Remove doc comments from the attrs and collect them into a single string.
174    let mut doc = String::new();
175    *attrs = std::mem::take(attrs)
176        .into_iter()
177        .filter_map(|attr| {
178            let Ok(name_value) = attr.meta.require_name_value() else {
179                return Some(attr);
180            };
181            if !name_value.path.is_ident("doc") || name_value.path.segments.len() != 1 {
182                return Some(attr);
183            }
184
185            let Expr::Lit(ExprLit {
186                lit: Lit::Str(lit), ..
187            }) = &name_value.value
188            else {
189                return Some(attr);
190            };
191
192            doc.push_str(&lit.value());
193            doc.push('\n');
194            None
195        })
196        .collect();
197
198    // Markdown options used by rustdoc.
199    let opts = pulldown_cmark::Options::ENABLE_TABLES
200        | pulldown_cmark::Options::ENABLE_FOOTNOTES
201        | pulldown_cmark::Options::ENABLE_STRIKETHROUGH
202        | pulldown_cmark::Options::ENABLE_TASKLISTS
203        | pulldown_cmark::Options::ENABLE_SMART_PUNCTUATION;
204
205    // Spans of code blocks in the doc comment. These are needed to ensure that math
206    // is not rendered inside of markdown code blocks.
207    let mut code_sections = Vec::new();
208    // Collect code sections from doc comment.
209    for (ev, range) in pulldown_cmark::Parser::new_ext(&doc, opts).into_offset_iter() {
210        if let pulldown_cmark::Event::Code(..)
211        | pulldown_cmark::Event::Start(pulldown_cmark::Tag::CodeBlock(..)) = ev
212        {
213            code_sections.push(range);
214        }
215    }
216
217    // Regex matching ASCII punctuation characters (https://spec.commonmark.org/0.31.2/#ascii-punctuation-character).
218    let punctuation = Regex::new(
219        r##"(?<punct>[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;<\=>\?\@\[\\\]\^\_\`\{\|\}\~])"##,
220    )
221    .unwrap();
222
223    // Math sections in the doc comment, used to avoid encoding images as base64
224    // data if they happen to be inside of a math block.
225    let mut math_sections = Vec::new();
226
227    let mut doc: String = parser::parse_math(&doc)
228        .into_iter()
229        .map(|event| match event {
230            parser::Event::Text(text) => {
231                // Leave all regular markdown text unchanged.
232                text.to_owned()
233            }
234            parser::Event::Math(math, range)
235                if code_sections
236                    .iter()
237                    .any(|section| section.start <= range.start && range.end <= section.end) =>
238            {
239                // Math sections inside code blocks are not rendered by KaTeX. Don't escape
240                // punctuation, leave them unchanged.
241                math.to_owned()
242            }
243            parser::Event::Math(math, range) => {
244                if math.lines().any(|line| line.trim().is_empty()) {
245                    // The rustdoc markdown renderer interprets blank lines as starting a new
246                    // paragraph, which would break the math.
247                    panic!("blank lines in math blocks are not supported");
248                }
249                math_sections.push(range);
250                // Escape all punctuation characters. This is to ensure that the markdown
251                // renderer in rustdoc doesn't break the math. (Otherwise, for example starting
252                // a line with "-" (minus) in the math block would cause the
253                // markdown to render as a list and completely break the math,
254                // or for example writing $[a](b)$ would render as a link.)
255                punctuation.replace_all(math, r"\$punct").into_owned()
256            }
257        })
258        .collect();
259
260    // Paths to local images used in the doc comment.
261    let mut imgs = HashSet::new();
262    // Collect all images from the doc comment that are not URLs. These will be
263    // encoded as base64 data and inserted into the doc comment as HTML tags, to
264    // be loaded and rendered by an image rendering script.
265    for (ev, range) in pulldown_cmark::Parser::new_ext(&doc, opts).into_offset_iter() {
266        let pulldown_cmark::Event::Start(pulldown_cmark::Tag::Image {
267            dest_url: path_or_url,
268            ..
269        }) = ev
270        else {
271            // Not an image tag.
272            continue;
273        };
274        if Url::parse(&path_or_url).is_ok() {
275            // This is a URL, so it is not a local image.
276            continue;
277        }
278        if math_sections
279            .iter()
280            .any(|section| section.start <= range.start && range.end <= section.end)
281        {
282            // The image is inside a math block, so it should not be encoded as base64.
283            // This avoids the following (extreme) edge case: if there is a math block such
284            // as $![img](path/does/not_exist.png)$, docext would panic because it couldn't
285            // find the image at that path. The root cause of the issue is that the markdown
286            // parser does not recognize math blocks, so it interprets the math
287            // as a regular image tag.
288            continue;
289        }
290        imgs.insert(path_or_url.into_string());
291    }
292
293    // Add the KaTeX CSS and JS to the doc comment, enabling TeX rending. Add a
294    // rendering script which calls `renderMathInElement` on its parent, so
295    // that the TeX is only rendered in the doc comment, not the entire page.
296    doc.push_str(r#"
297<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.8/dist/katex.min.css" integrity="sha384-GvrOXuhMATgEsSwCs4smul74iXGOixntILdUW9XmUC6+HX0sLNAK3q71HotJqlAn" crossorigin="anonymous">
298<script src="https://cdn.jsdelivr.net/npm/katex@0.16.8/dist/katex.min.js" integrity="sha384-cpW21h6RZv/phavutF+AuVYrr+dA8xD9zs6FwLpaCct6O9ctzYFfFr4dgmgccOTx" crossorigin="anonymous"></script>
299<script src="https://cdn.jsdelivr.net/npm/katex@0.16.8/dist/contrib/auto-render.min.js" integrity="sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05" crossorigin="anonymous"></script>
300<script>
301(function() {
302    var parent = document.currentScript.parentElement;
303    document.addEventListener("DOMContentLoaded", function() {
304        renderMathInElement(parent, {
305            delimiters:[
306                { left: "$$", right: "$$", display: true },
307                { left: "$", right: "$", display: false }
308            ]
309        })
310    });
311})()
312</script>"#);
313
314    // Encode all images as base64 data inside of span attributes. Later, a script
315    // will replace the src attributes of the images with the base64 data. This
316    // is done to facilitate high-quality IDE hovers, since putting the base64 data
317    // directly in the middle of the hover could result in bad UX.
318    for img in imgs.iter() {
319        // Load the image relative to the current crate.
320        let mut path = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());
321        path.push(img);
322
323        // Ensure that the file is not too large, otherwise the compiler might crash.
324        let metadata = fs::metadata(&path).unwrap_or_else(|_| {
325            panic!(
326                r#"failed to stat image: "{}", is the file missing?"#,
327                path.to_string_lossy()
328            )
329        });
330        if metadata.len() > 1024 * 1024 {
331            panic!(
332                r#"image file too large: "{}", max size is 1MB"#,
333                path.to_string_lossy()
334            );
335        }
336
337        // Encode the image as base64.
338        let data = fs::read(&path).unwrap_or_else(|_| {
339            panic!(
340                r#"failed to read image: "{}", is the file missing?"#,
341                path.to_string_lossy()
342            )
343        });
344        let base64 = base64::engine::general_purpose::STANDARD.encode(&data);
345
346        // The data URL requires a MIME type.
347        let mime = mime(&path);
348
349        // Add a span containing the image data encoded as base64.
350        doc.push('\n');
351        doc.push_str(&format!(
352            r#"<span class="docext-img" data-src="{img}" data-img="data:{mime};base64,{base64}"></span>"#,
353        ));
354    }
355
356    if !imgs.is_empty() {
357        // Add the image rendering script.
358        doc.push_str(r#"
359<script>
360(function() {
361    var elem = document.currentScript.parentElement;
362    document.addEventListener("DOMContentLoaded", function() {
363        elem.querySelectorAll(".docext-img").forEach(function(e) {
364            elem.querySelectorAll("img[src='" + e.getAttribute("data-src") + "']").forEach(function(i) {
365                i.src = e.getAttribute("data-img");
366            });
367        });
368    });
369})()
370</script>"#);
371    }
372
373    // Create the modified doc attribute.
374    attrs.push(Attribute {
375        pound_token: Pound::default(),
376        style: AttrStyle::Outer,
377        bracket_token: Bracket::default(),
378        meta: Meta::NameValue(MetaNameValue {
379            path: Path {
380                leading_colon: None,
381                segments: Punctuated::from_iter([PathSegment {
382                    ident: Ident::new("doc", Span::call_site()),
383                    arguments: PathArguments::None,
384                }]),
385            },
386            eq_token: Eq::default(),
387            value: Expr::Lit(ExprLit {
388                attrs: Default::default(),
389                lit: Lit::Str(LitStr::new(&doc, Span::call_site())),
390            }),
391        }),
392    });
393}
394
395/// Get the MIME type of the given image path based on its extension.
396fn mime(path: &path::Path) -> &'static str {
397    let ext = path.extension().unwrap_or_else(|| {
398        panic!(
399            r#"image path has no extension: "{}""#,
400            path.to_string_lossy()
401        )
402    });
403    match ext.to_string_lossy().as_ref() {
404        "apng" => "image/apng",
405        "avif" => "image/avif",
406        "gif" => "image/gif",
407        "jpg" | "jpeg" | "jfif" | "pjpeg" | "pjp" => "image/jpeg",
408        "png" => "image/png",
409        "svg" => "image/svg+xml",
410        "webp" => "image/webp",
411        "bmp" => "image/bmp",
412        "ico" | "cur" => "image/x-icon",
413        "tif" | "tiff" => "image/tiff",
414        _ => panic!(r#"unsupported image format: "{}""#, ext.to_string_lossy()),
415    }
416}