Skip to main content

sourcey_rustdoc/
doctest.rs

1use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
2
3use crate::spec::Doctest;
4
5/// Tokens that explicitly mark a fence as non-rust. Anything else is treated
6/// as a rust doctest with the tokens collected as fence attributes,
7/// matching rustdoc's own behavior.
8const NON_RUST_LANGS: &[&str] = &[
9    "text",
10    "ignore-but-not-rust",
11    "json",
12    "toml",
13    "yaml",
14    "html",
15    "bash",
16    "sh",
17    "shell",
18    "console",
19    "diff",
20    "ini",
21    "markdown",
22    "md",
23];
24
25/// Tokens that are recognised by rustdoc as fence attributes (kept as
26/// attributes, not consumed as a language identifier).
27const KNOWN_FENCE_ATTRS: &[&str] = &[
28    "ignore",
29    "no_run",
30    "should_panic",
31    "compile_fail",
32    "allow_fail",
33    "standalone_crate",
34    "edition2015",
35    "edition2018",
36    "edition2021",
37    "edition2024",
38];
39
40/// Parse the docstring markdown and return every code fence as a Doctest.
41/// Doctests in rustdoc are any ` ``` ` fence whose language token is empty, `rust`,
42/// or starts with `rust,`. We also surface non-rust fences so the renderer can
43/// label them; the caller filters on `lang`/`fence_attributes`.
44pub fn extract_doctests(docs_markdown: &str) -> Vec<Doctest> {
45    let mut out = Vec::new();
46    let parser = Parser::new(docs_markdown);
47    let mut active: Option<ActiveFence> = None;
48    let mut ordinal: u32 = 0;
49    for ev in parser {
50        match ev {
51            Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
52                let info = info.into_string();
53                active = Some(ActiveFence::new(&info));
54            }
55            Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) => {
56                active = Some(ActiveFence::default_rust());
57            }
58            Event::Text(text) => {
59                if let Some(fence) = active.as_mut() {
60                    fence.body.push_str(&text);
61                }
62            }
63            Event::End(TagEnd::CodeBlock) => {
64                if let Some(fence) = active.take() {
65                    if let Some(doctest) = fence.into_doctest(ordinal) {
66                        out.push(doctest);
67                        ordinal += 1;
68                    }
69                }
70            }
71            _ => {}
72        }
73    }
74    out
75}
76
77struct ActiveFence {
78    lang: String,
79    fence_attributes: Vec<String>,
80    body: String,
81    is_rust: bool,
82}
83
84impl ActiveFence {
85    fn new(info: &str) -> Self {
86        let parts: Vec<String> = info
87            .split(',')
88            .map(str::trim)
89            .filter(|s| !s.is_empty())
90            .map(|s| s.to_string())
91            .collect();
92        if parts.is_empty() {
93            return Self {
94                lang: "rust".to_string(),
95                fence_attributes: Vec::new(),
96                body: String::new(),
97                is_rust: true,
98            };
99        }
100        let first = &parts[0];
101        let first_lc = first.to_lowercase();
102        // Explicit non-rust language: not a doctest.
103        if NON_RUST_LANGS.contains(&first_lc.as_str()) {
104            return Self {
105                lang: first.clone(),
106                fence_attributes: parts[1..].to_vec(),
107                body: String::new(),
108                is_rust: false,
109            };
110        }
111        // Explicit `rust` lang: drop it, keep the rest as attributes.
112        if first_lc == "rust" {
113            return Self {
114                lang: "rust".to_string(),
115                fence_attributes: parts[1..].to_vec(),
116                body: String::new(),
117                is_rust: true,
118            };
119        }
120        // First token is a known rust attribute (e.g. `no_run`): all tokens
121        // are attributes; implicit rust lang.
122        if KNOWN_FENCE_ATTRS.contains(&first_lc.as_str()) {
123            return Self {
124                lang: "rust".to_string(),
125                fence_attributes: parts.clone(),
126                body: String::new(),
127                is_rust: true,
128            };
129        }
130        // Unknown first token (e.g. `mermaid`): treat as a foreign language,
131        // not a doctest.
132        Self {
133            lang: first.clone(),
134            fence_attributes: parts[1..].to_vec(),
135            body: String::new(),
136            is_rust: false,
137        }
138    }
139
140    fn default_rust() -> Self {
141        Self {
142            lang: "rust".to_string(),
143            fence_attributes: Vec::new(),
144            body: String::new(),
145            is_rust: true,
146        }
147    }
148
149    fn into_doctest(self, ordinal: u32) -> Option<Doctest> {
150        if !self.is_rust {
151            return None;
152        }
153        let (display_code, executable_code) = split_hidden_lines(&self.body);
154        let implicit_main_wrap = !contains_top_level_main(&executable_code);
155        Some(Doctest {
156            lang: self.lang,
157            fence_attributes: self.fence_attributes,
158            display_code,
159            executable_code,
160            implicit_main_wrap,
161            source: None,
162            ordinal,
163        })
164    }
165}
166
167fn split_hidden_lines(body: &str) -> (String, String) {
168    let mut display = String::new();
169    let mut executable = String::new();
170    for line in body.lines() {
171        let trimmed = line.trim_start();
172        if let Some(rest) = trimmed.strip_prefix("# ") {
173            let indent_len = line.len() - trimmed.len();
174            executable.push_str(&line[..indent_len]);
175            executable.push_str(rest);
176            executable.push('\n');
177            continue;
178        }
179        if trimmed == "#" {
180            executable.push('\n');
181            continue;
182        }
183        if let Some(rest) = trimmed.strip_prefix("##") {
184            let indent_len = line.len() - trimmed.len();
185            display.push_str(&line[..indent_len]);
186            display.push('#');
187            display.push_str(rest);
188            display.push('\n');
189            executable.push_str(&line[..indent_len]);
190            executable.push('#');
191            executable.push_str(rest);
192            executable.push('\n');
193            continue;
194        }
195        display.push_str(line);
196        display.push('\n');
197        executable.push_str(line);
198        executable.push('\n');
199    }
200    (display, executable)
201}
202
203fn contains_top_level_main(code: &str) -> bool {
204    let mut depth: i32 = 0;
205    for line in code.lines() {
206        let trimmed = line.trim_start();
207        if depth == 0 && (trimmed.starts_with("fn main(") || trimmed.starts_with("fn main ")) {
208            return true;
209        }
210        for ch in line.chars() {
211            match ch {
212                '{' => depth += 1,
213                '}' => depth -= 1,
214                _ => {}
215            }
216        }
217    }
218    false
219}
220
221#[cfg(test)]
222mod tests {
223    use super::*;
224
225    #[test]
226    fn extracts_rust_fences() {
227        let docs = "Example\n\n```\nlet x = 1;\n```\n";
228        let doctests = extract_doctests(docs);
229        assert_eq!(doctests.len(), 1);
230        assert!(doctests[0].display_code.contains("let x = 1;"));
231    }
232
233    #[test]
234    fn skips_text_fences() {
235        let docs = "```text\nnot rust\n```\n";
236        assert!(extract_doctests(docs).is_empty());
237    }
238
239    #[test]
240    fn captures_fence_attributes() {
241        let docs = "```rust,no_run,should_panic\nlet x = 1;\n```\n";
242        let doctests = extract_doctests(docs);
243        assert_eq!(doctests[0].fence_attributes, vec!["no_run", "should_panic"]);
244    }
245
246    #[test]
247    fn strips_hidden_lines_from_display() {
248        let docs = "```\n# use foo::Bar;\nlet x = Bar::new();\n```\n";
249        let doctests = extract_doctests(docs);
250        assert!(!doctests[0].display_code.contains("# use foo::Bar"));
251        assert!(doctests[0].display_code.contains("let x = Bar::new();"));
252        assert!(doctests[0].executable_code.contains("use foo::Bar;"));
253    }
254
255    #[test]
256    fn detects_explicit_main() {
257        let docs = "```\nfn main() {\n    let _ = 1;\n}\n```\n";
258        let doctests = extract_doctests(docs);
259        assert!(!doctests[0].implicit_main_wrap);
260    }
261
262    #[test]
263    fn detects_implicit_main() {
264        let docs = "```\nlet x = 1;\nassert_eq!(x, 1);\n```\n";
265        let doctests = extract_doctests(docs);
266        assert!(doctests[0].implicit_main_wrap);
267    }
268}