sourcey_rustdoc/
doctest.rs1use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
2
3use crate::spec::Doctest;
4
5const NON_RUST_LANGS: &[&str] = &[
9 "text",
10 "ignore-but-not-rust",
11 "json",
12 "toml",
13 "yaml",
14 "html",
15 "bash",
16 "sh",
17 "shell",
18 "console",
19 "diff",
20 "ini",
21 "markdown",
22 "md",
23];
24
25const KNOWN_FENCE_ATTRS: &[&str] = &[
28 "ignore",
29 "no_run",
30 "should_panic",
31 "compile_fail",
32 "allow_fail",
33 "standalone_crate",
34 "edition2015",
35 "edition2018",
36 "edition2021",
37 "edition2024",
38];
39
40pub fn extract_doctests(docs_markdown: &str) -> Vec<Doctest> {
45 let mut out = Vec::new();
46 let parser = Parser::new(docs_markdown);
47 let mut active: Option<ActiveFence> = None;
48 let mut ordinal: u32 = 0;
49 for ev in parser {
50 match ev {
51 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
52 let info = info.into_string();
53 active = Some(ActiveFence::new(&info));
54 }
55 Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) => {
56 active = Some(ActiveFence::default_rust());
57 }
58 Event::Text(text) => {
59 if let Some(fence) = active.as_mut() {
60 fence.body.push_str(&text);
61 }
62 }
63 Event::End(TagEnd::CodeBlock) => {
64 if let Some(fence) = active.take() {
65 if let Some(doctest) = fence.into_doctest(ordinal) {
66 out.push(doctest);
67 ordinal += 1;
68 }
69 }
70 }
71 _ => {}
72 }
73 }
74 out
75}
76
77struct ActiveFence {
78 lang: String,
79 fence_attributes: Vec<String>,
80 body: String,
81 is_rust: bool,
82}
83
84impl ActiveFence {
85 fn new(info: &str) -> Self {
86 let parts: Vec<String> = info
87 .split(',')
88 .map(str::trim)
89 .filter(|s| !s.is_empty())
90 .map(|s| s.to_string())
91 .collect();
92 if parts.is_empty() {
93 return Self {
94 lang: "rust".to_string(),
95 fence_attributes: Vec::new(),
96 body: String::new(),
97 is_rust: true,
98 };
99 }
100 let first = &parts[0];
101 let first_lc = first.to_lowercase();
102 if NON_RUST_LANGS.contains(&first_lc.as_str()) {
104 return Self {
105 lang: first.clone(),
106 fence_attributes: parts[1..].to_vec(),
107 body: String::new(),
108 is_rust: false,
109 };
110 }
111 if first_lc == "rust" {
113 return Self {
114 lang: "rust".to_string(),
115 fence_attributes: parts[1..].to_vec(),
116 body: String::new(),
117 is_rust: true,
118 };
119 }
120 if KNOWN_FENCE_ATTRS.contains(&first_lc.as_str()) {
123 return Self {
124 lang: "rust".to_string(),
125 fence_attributes: parts.clone(),
126 body: String::new(),
127 is_rust: true,
128 };
129 }
130 Self {
133 lang: first.clone(),
134 fence_attributes: parts[1..].to_vec(),
135 body: String::new(),
136 is_rust: false,
137 }
138 }
139
140 fn default_rust() -> Self {
141 Self {
142 lang: "rust".to_string(),
143 fence_attributes: Vec::new(),
144 body: String::new(),
145 is_rust: true,
146 }
147 }
148
149 fn into_doctest(self, ordinal: u32) -> Option<Doctest> {
150 if !self.is_rust {
151 return None;
152 }
153 let (display_code, executable_code) = split_hidden_lines(&self.body);
154 let implicit_main_wrap = !contains_top_level_main(&executable_code);
155 Some(Doctest {
156 lang: self.lang,
157 fence_attributes: self.fence_attributes,
158 display_code,
159 executable_code,
160 implicit_main_wrap,
161 source: None,
162 ordinal,
163 })
164 }
165}
166
167fn split_hidden_lines(body: &str) -> (String, String) {
168 let mut display = String::new();
169 let mut executable = String::new();
170 for line in body.lines() {
171 let trimmed = line.trim_start();
172 if let Some(rest) = trimmed.strip_prefix("# ") {
173 let indent_len = line.len() - trimmed.len();
174 executable.push_str(&line[..indent_len]);
175 executable.push_str(rest);
176 executable.push('\n');
177 continue;
178 }
179 if trimmed == "#" {
180 executable.push('\n');
181 continue;
182 }
183 if let Some(rest) = trimmed.strip_prefix("##") {
184 let indent_len = line.len() - trimmed.len();
185 display.push_str(&line[..indent_len]);
186 display.push('#');
187 display.push_str(rest);
188 display.push('\n');
189 executable.push_str(&line[..indent_len]);
190 executable.push('#');
191 executable.push_str(rest);
192 executable.push('\n');
193 continue;
194 }
195 display.push_str(line);
196 display.push('\n');
197 executable.push_str(line);
198 executable.push('\n');
199 }
200 (display, executable)
201}
202
203fn contains_top_level_main(code: &str) -> bool {
204 let mut depth: i32 = 0;
205 for line in code.lines() {
206 let trimmed = line.trim_start();
207 if depth == 0 && (trimmed.starts_with("fn main(") || trimmed.starts_with("fn main ")) {
208 return true;
209 }
210 for ch in line.chars() {
211 match ch {
212 '{' => depth += 1,
213 '}' => depth -= 1,
214 _ => {}
215 }
216 }
217 }
218 false
219}
220
221#[cfg(test)]
222mod tests {
223 use super::*;
224
225 #[test]
226 fn extracts_rust_fences() {
227 let docs = "Example\n\n```\nlet x = 1;\n```\n";
228 let doctests = extract_doctests(docs);
229 assert_eq!(doctests.len(), 1);
230 assert!(doctests[0].display_code.contains("let x = 1;"));
231 }
232
233 #[test]
234 fn skips_text_fences() {
235 let docs = "```text\nnot rust\n```\n";
236 assert!(extract_doctests(docs).is_empty());
237 }
238
239 #[test]
240 fn captures_fence_attributes() {
241 let docs = "```rust,no_run,should_panic\nlet x = 1;\n```\n";
242 let doctests = extract_doctests(docs);
243 assert_eq!(doctests[0].fence_attributes, vec!["no_run", "should_panic"]);
244 }
245
246 #[test]
247 fn strips_hidden_lines_from_display() {
248 let docs = "```\n# use foo::Bar;\nlet x = Bar::new();\n```\n";
249 let doctests = extract_doctests(docs);
250 assert!(!doctests[0].display_code.contains("# use foo::Bar"));
251 assert!(doctests[0].display_code.contains("let x = Bar::new();"));
252 assert!(doctests[0].executable_code.contains("use foo::Bar;"));
253 }
254
255 #[test]
256 fn detects_explicit_main() {
257 let docs = "```\nfn main() {\n let _ = 1;\n}\n```\n";
258 let doctests = extract_doctests(docs);
259 assert!(!doctests[0].implicit_main_wrap);
260 }
261
262 #[test]
263 fn detects_implicit_main() {
264 let docs = "```\nlet x = 1;\nassert_eq!(x, 1);\n```\n";
265 let doctests = extract_doctests(docs);
266 assert!(doctests[0].implicit_main_wrap);
267 }
268}