Skip to main content

cargo_doc2readme/
output.rs

1use crate::{
2	diagnostic,
3	input::{InputFile, Scope, TargetType},
4	links::Links
5};
6use itertools::Itertools as _;
7use miette::{Context as _, IntoDiagnostic as _};
8use pulldown_cmark::{
9	BrokenLink, CodeBlockKind, CowStr, Event, HeadingLevel, LinkType, Options, Parser,
10	Tag, TagEnd
11};
12use semver::Version;
13use serde::Serialize;
14use std::{collections::BTreeMap, fmt::Write as _, io};
15use syn::Path;
16use url::Url;
17
18const DEFAULT_CODEBLOCK_LANG: &str = "rust";
19/// List of codeblock flags that rustdoc allows
20const RUSTDOC_CODEBLOCK_FLAGS: &[&str] = &[
21	"compile_fail",
22	"edition2015",
23	"edition2018",
24	"edition2021",
25	"edition2024",
26	"ignore",
27	"no_run",
28	"should_panic"
29];
30const RUSTDOC_CODEBLOCK_PREFIXES: &[&str] = &["ignore-"];
31
32pub struct ResolvedLink {
33	pub path: String,
34	pub link_type: Option<crate::input::LinkType>
35}
36
37impl Scope {
38	pub fn resolve(&self, crate_name: &str, path: String) -> ResolvedLink {
39		self.resolve_impl(crate_name, None, path)
40	}
41
42	pub fn resolve_impl(
43		&self,
44		crate_name: &str,
45		link_type: Option<crate::input::LinkType>,
46		path: String
47	) -> ResolvedLink {
48		if !path.starts_with("::") {
49			// split path into segments, ignoring <...> generics
50			let mut path = path.clone();
51			loop {
52				let idx = match (path.find('<'), path.rfind('>')) {
53					(Some(idx1), Some(idx2)) if idx1 < idx2 => idx1,
54					_ => break
55				};
56				let mut end = idx + 1;
57				let mut depth: usize = 1;
58				for ch in path[end ..].chars() {
59					if ch == '<' {
60						depth += 1;
61					} else if ch == '>' {
62						depth -= 1;
63					}
64					end += ch.len_utf8();
65
66					if depth == 0 {
67						break;
68					}
69				}
70				path.replace_range(idx .. end, "");
71			}
72			// debug!("Trying to resolve path {path:?}");
73			let mut segments = path.split("::").collect::<Vec<_>>();
74			if segments[0] == "crate" {
75				segments[0] = crate_name;
76			}
77
78			// check if we can resolve anything
79			if self.scope.contains_key(segments[0]) {
80				let paths = &self.scope[segments[0]];
81				if let Some((path0_link_type, path0)) = paths.front() {
82					segments[0] = path0;
83					let resolved_path = segments.join("::");
84					// debug!("Resolved path {path:?} to {resolved_path:?} as child of {path0:?} ({path0_link_type:?})");
85					if path0.starts_with("::") {
86						return ResolvedLink {
87							path: resolved_path,
88							link_type: if segments.len() == 1 {
89								Some(*path0_link_type)
90							} else {
91								link_type
92							}
93						};
94					}
95					return self.resolve(crate_name, resolved_path);
96				}
97			}
98		}
99
100		ResolvedLink { path, link_type }
101	}
102}
103
104fn broken_link_callback<'a>(lnk: BrokenLink<'_>) -> Option<(CowStr<'a>, CowStr<'a>)> {
105	Some(("".into(), lnk.reference.to_string().into()))
106}
107
108fn filter_hidden_rust_codeblock_lines(line: &str) -> Option<&str> {
109	match line.strip_prefix('#') {
110		Some(stripped_line) => match stripped_line.chars().next() {
111			// ignore "#" lines
112			None => None,
113			// replace "##" at the start of a line with "#"
114			Some('#') => Some(stripped_line),
115			// ignore lines starting with "#" if it is followed by whitespace
116			Some(c) if c.is_whitespace() => None,
117			// don't ignore the line if the "#" isn't followed by whitespace
118			Some(_) => Some(line)
119		},
120		None => Some(line)
121	}
122}
123
124struct EventFilter<'a, I: Iterator<Item = Event<'a>>> {
125	iter: I,
126	links: &'a mut BTreeMap<String, String>,
127
128	in_code_block: bool,
129	in_rust_code_block: bool,
130	link_idx: usize
131}
132
133impl<'a, I: Iterator<Item = Event<'a>>> EventFilter<'a, I> {
134	fn new(iter: I, links: &'a mut BTreeMap<String, String>) -> Self {
135		Self {
136			iter,
137			links,
138
139			in_code_block: false,
140			in_rust_code_block: false,
141			link_idx: 0
142		}
143	}
144}
145
146impl<'a, I: Iterator<Item = Event<'a>>> Iterator for EventFilter<'a, I> {
147	type Item = Event<'a>;
148
149	fn next(&mut self) -> Option<Self::Item> {
150		loop {
151			break Some(match self.iter.next()? {
152				Event::Start(tag) => Event::Start(match tag {
153					// we increase headings by 1 level
154					Tag::Heading {
155						level,
156						id,
157						classes,
158						attrs
159					} => {
160						let level = match level {
161							HeadingLevel::H1 => HeadingLevel::H2,
162							HeadingLevel::H2 => HeadingLevel::H3,
163							HeadingLevel::H3 => HeadingLevel::H4,
164							HeadingLevel::H4 => HeadingLevel::H5,
165							_ => HeadingLevel::H6
166						};
167						Tag::Heading {
168							level,
169							id,
170							classes,
171							attrs
172						}
173					},
174
175					// we record codeblocks and adjust their language
176					Tag::CodeBlock(kind) => {
177						debug_assert!(
178							!self.in_code_block,
179							"Recursive codeblocks, wtf???"
180						);
181						self.in_code_block = true;
182						Tag::CodeBlock(CodeBlockKind::Fenced(match kind {
183							CodeBlockKind::Indented => DEFAULT_CODEBLOCK_LANG.into(),
184							CodeBlockKind::Fenced(lang) => {
185								let mut lang: String = (*lang).to_owned();
186								for prefix in RUSTDOC_CODEBLOCK_PREFIXES {
187									while let Some(idx_start) = lang.find(prefix) {
188										let idx_off = idx_start + prefix.len();
189										match lang[idx_off ..].find(',') {
190											Some(idx_end) => lang.replace_range(
191												idx_start ..= idx_off + idx_end,
192												""
193											),
194											None => lang.replace_range(idx_start .., "")
195										}
196									}
197								}
198								for flag in RUSTDOC_CODEBLOCK_FLAGS {
199									lang = lang.replace(flag, "");
200								}
201								let mut lang: CowStr<'_> = lang.replace(',', "").into();
202								if lang.is_empty() {
203									lang = DEFAULT_CODEBLOCK_LANG.into();
204								}
205								self.in_rust_code_block =
206									&*lang == DEFAULT_CODEBLOCK_LANG;
207								lang
208							}
209						}))
210					},
211
212					Tag::Link {
213						link_type,
214						dest_url,
215						title,
216						id
217					} if dest_url.starts_with('#')
218						|| link_type == LinkType::Autolink
219						|| link_type == LinkType::Email =>
220					{
221						Tag::Link {
222							link_type,
223							dest_url,
224							title,
225							id
226						}
227					},
228					Tag::Link {
229						dest_url,
230						title,
231						id,
232						link_type
233					} => {
234						let link = format!("__link{}", self.link_idx);
235						self.link_idx += 1;
236						if !dest_url.is_empty() {
237							self.links.insert(link.clone(), dest_url.to_string());
238						} else if !id.is_empty() {
239							self.links.insert(link.clone(), id.to_string());
240						} else if !title.is_empty() {
241							self.links.insert(link.clone(), title.to_string());
242						} else {
243							break Some(Event::Start(Tag::Link {
244								link_type,
245								dest_url,
246								title,
247								id
248							}));
249						}
250						Tag::Link {
251							// pulldown-cmark-to-cmark does not support outputting
252							// unresolved reference-style links so we have to do
253							// it this stupid way
254							link_type: LinkType::Inline,
255							dest_url: link.into(),
256							title: "".into(),
257							id
258						}
259					},
260
261					// we don't need to modify any other tags
262					tag => tag
263				}),
264
265				Event::End(tag) => Event::End(match tag {
266					// we record when a codeblock ends
267					TagEnd::CodeBlock => {
268						debug_assert!(
269							self.in_code_block,
270							"Ending non-started code block, wtf???"
271						);
272						self.in_code_block = false;
273						self.in_rust_code_block = false;
274						TagEnd::CodeBlock
275					},
276					// we don't need to modify any other tags
277					tag => tag
278				}),
279
280				Event::Text(text) if self.in_code_block && self.in_rust_code_block => {
281					let mut filtered = text
282						.lines()
283						.filter_map(|line| filter_hidden_rust_codeblock_lines(line))
284						.join("\n");
285					if filtered.is_empty() {
286						continue;
287					}
288					if text.ends_with('\n') {
289						filtered.push('\n');
290					}
291					Event::Text(filtered.into())
292				},
293
294				ev => ev
295			});
296		}
297	}
298}
299
300struct Readme<'a> {
301	template: &'a str,
302	builtin_template: bool,
303	input: &'a InputFile,
304
305	/// Holds the main markdown part of the readme that was created from the rustdoc,
306	/// but does not include any parts of the template or the links.
307	readme: String,
308
309	/// Holds the link part of the markdown.
310	readme_links: String,
311
312	links: BTreeMap<String, String>
313}
314
315impl<'a> Readme<'a> {
316	fn new(template: &'a str, builtin_template: bool, input: &'a InputFile) -> Self {
317		Self {
318			template,
319			builtin_template,
320			input,
321			readme: String::new(),
322			readme_links: String::new(),
323			links: BTreeMap::new()
324		}
325	}
326
327	fn write_markdown(&mut self) -> Result<(), pulldown_cmark_to_cmark::Error> {
328		// we need this broken link callback for the purpose of broken links being parsed as links
329		let mut broken_link_callback = broken_link_callback;
330		let parser = Parser::new_with_broken_link_callback(
331			&self.input.rustdoc,
332			Options::all(),
333			Some(&mut broken_link_callback)
334		);
335
336		let options = pulldown_cmark_to_cmark::Options {
337			code_block_token_count: 3,
338			..Default::default()
339		};
340		pulldown_cmark_to_cmark::cmark_with_options(
341			EventFilter::new(parser.into_iter(), &mut self.links),
342			&mut self.readme,
343			options
344		)?;
345
346		// we need to replace the links generated by pulldown-cmark-to-cmark with
347		// reference-style links
348		let mut i = 0;
349		while i < self.readme.len() {
350			let Some(idx) = self.readme[i ..].find("(__link") else {
351				break;
352			};
353			let idx = idx + i;
354			let Some(idx2) = self.readme[idx ..].find(')') else {
355				break;
356			};
357			let idx2 = idx2 + idx;
358			i = idx2;
359
360			self.readme.replace_range(idx ..= idx, "[");
361			self.readme.replace_range(idx2 ..= idx2, "]");
362		}
363
364		if !self.readme.ends_with('\n') {
365			self.readme.push('\n');
366		}
367
368		Ok(())
369	}
370
371	fn write_links(&mut self) {
372		let mut links =
373			Links::new(self.template, self.builtin_template, &self.input.rustdoc);
374		for link in self.links.keys().cloned().collect::<Vec<_>>() {
375			let mut href = self.links[&link].to_owned();
376			if href.starts_with('`') && href.ends_with('`') {
377				href = href[1 .. href.len() - 1].to_owned();
378			}
379			let href = self.input.scope.resolve(&self.input.crate_name, href);
380
381			// apply sanitation: If the link looks like a resolved link (i.e. it starts
382			// with `::`), and it ends with `()` for functions or `!` for macros, we
383			// remove that last token
384			let mut href_path = href.path;
385			if href_path.starts_with("::") {
386				if href_path.ends_with("()") {
387					href_path.truncate(href_path.len() - 2);
388				} else if href_path.ends_with("!") {
389					href_path.truncate(href_path.len() - 1);
390				}
391			}
392
393			if let Ok(path) = syn::parse_str::<Path>(&href_path) {
394				self.links
395					.insert(link, links.build_link(&path, href.link_type, self.input));
396			} else {
397				// debug!("Unable to parse {href_path:?} as syn::Path, not modifying link");
398			}
399		}
400
401		if !links.deps.is_empty() {
402			writeln!(
403				self.readme_links,
404				" [__cargo_doc2readme_dependencies_info]: {}",
405				links.deps.encode()
406			)
407			.unwrap();
408		}
409		for (name, href) in &self.links {
410			// unwrap: writing to a String never fails
411			writeln!(self.readme_links, " [{}]: {}", name, href).unwrap();
412		}
413	}
414}
415
416/// This struct documents all available placeholders in the readme template.
417#[derive(Serialize)]
418pub struct TemplateContext<'a> {
419	/// The name of the crate.
420	///
421	/// This is renamed to `crate` for the template, do not use ` {{ krate }}`,
422	/// it won't work.
423	#[serde(rename = "crate")]
424	pub krate: &'a str,
425
426	/// The version of the crate.
427	///
428	/// This is renamed to `crate_version` for the template, do not use `{{ krate_version }}`,
429	/// it won't work.
430	#[serde(rename = "crate_version")]
431	pub krate_version: &'a str,
432
433	/// The target type, etiher `bin` or `lib`.
434	pub target: TargetType,
435
436	/// The `repository` variable from the `Cargo.toml` file, if set.
437	pub repository: Option<&'a str>,
438	/// The HTTP(S) host name of the `repository` variable, if set. This can be used to
439	/// add a [Codeberg] icon if the code is hosted on [Codeberg], for example.
440	///
441	///  [Codeberg]: https://codeberg.org
442	pub repository_host: Option<String>,
443
444	/// The `license` variable from the `Cargo.toml` file, if set. Note that in case only
445	/// the `license-file` variable is set, this will be none.
446	pub license: Option<&'a str>,
447
448	/// The `rust_version` variable from the `Cargo.toml` file, interpreted as a semver
449	/// [Version]. This means that, if the `Cargo.toml` specifies `1.90`, this variable
450	/// will read `1.90.0`.
451	pub rust_version: Option<&'a Version>,
452
453	/// The main content of the readme. Every template should contain this variable
454	/// exactly once. Place it whever it is convenient.
455	pub readme: String,
456
457	/// The links section of the readme. Place this at the bottom of the readme. You may
458	/// omit this if it is empty.
459	pub links: String
460}
461
462pub fn emit<W: io::Write>(
463	input: &InputFile,
464	template_filename: &str,
465	template: &str,
466	builtin_template: bool,
467	out_file: W
468) -> miette::Result<()> {
469	let mut readme = Readme::new(template, builtin_template, input);
470
471	// unwrap: This will never fail since we're only writing to a String.
472	// it is just inconvenient to write .unwrap() behind every single write!() invocation
473	readme.write_markdown().unwrap();
474
475	readme.write_links();
476
477	let repository = input.repository.as_deref();
478	let ctx = TemplateContext {
479		krate: &input.crate_name,
480		krate_version: &format!("{}", input.crate_version),
481		target: input.target_type,
482		repository,
483		repository_host: repository.and_then(|repo| {
484			let url = Url::parse(repo).ok();
485			url.as_ref()
486				.and_then(|url| url.host_str())
487				.map(String::from)
488		}),
489		license: input.license.as_deref(),
490		rust_version: input.rust_version.as_ref(),
491		readme: readme.readme,
492		links: readme.readme_links
493	};
494
495	let mut env = minijinja::Environment::new();
496	env.add_template(template_filename, template)
497		.map_err(|err| {
498			diagnostic::SyntaxError::new_jinja(template_filename, template, err)
499		})?;
500	env.get_template(template_filename)
501		.unwrap()
502		.render_to_write(ctx, out_file)
503		.into_diagnostic()
504		.context("Failed to render template")?;
505
506	Ok(())
507}