Skip to main content

cargo_rdme/transform/intralinks/
mod.rs

1use crate::transform::DocTransform;
2use crate::transform::intralinks::links::{
3    Link, MarkdownInlineLink, MarkdownLink, MarkdownReferenceLink, markdown_link_iterator,
4    markdown_reference_link_definition_iterator,
5};
6use crate::transform::intralinks::rustdoc::{IntralinkResolver, create_intralink_resolver};
7use crate::{Doc, PackageTarget};
8use itertools::Itertools;
9use std::borrow::Cow;
10use std::collections::HashSet;
11use std::fmt::Display;
12use std::path::PathBuf;
13use thiserror::Error;
14use unicase::UniCase;
15
16mod links;
17mod rustdoc;
18
19#[derive(Error, Debug)]
20pub enum IntralinkError {
21    #[error("failed to run rustdoc: {error}")]
22    RustdocError {
23        #[source]
24        error: rustdoc_json::BuildError,
25    },
26    #[error("failed to run rustdoc:\n{stderr}")]
27    BuildRustdocError { stderr: String },
28    #[error("failed to read rustdoc json file: {io_error}")]
29    ReadRustdocError {
30        #[source]
31        io_error: std::io::Error,
32    },
33    #[error("failed to parse rustdoc json file: {serde_error}")]
34    ParseRustdocError { serde_error: serde_json::Error },
35    #[error("unsupported rustdoc format version {version} (expected version {expected_version})")]
36    UnsupportedRustdocFormatVersion { version: u32, expected_version: u32 },
37    #[error("rust toolchain not installed: {expected}")]
38    RustToolchainNotInstalled { expected: &'static str },
39    #[error("failed to run rustup toolchain: {error}")]
40    RustupToolchain { error: rustup_toolchain::Error },
41}
42
43#[derive(Default, Debug, PartialEq, Eq, Clone)]
44pub struct IntralinksDocsRsConfig {
45    pub docs_rs_base_url: Option<String>,
46    pub docs_rs_version: Option<String>,
47}
48
49#[derive(Default, Debug, PartialEq, Eq, Clone)]
50pub struct IntralinksConfig {
51    pub docs_rs: IntralinksDocsRsConfig,
52    pub strip_links: Option<bool>,
53    pub all_features: Option<bool>,
54    pub features: Option<Vec<String>>,
55    pub no_default_features: Option<bool>,
56}
57
58pub struct DocTransformIntralinks<F> {
59    package_name: String,
60    package_target: PackageTarget,
61    workspace_package: Option<String>,
62    manifest_path: PathBuf,
63    emit_warning: F,
64    config: IntralinksConfig,
65}
66
67impl<F> DocTransformIntralinks<F>
68where
69    F: Fn(&str),
70{
71    pub fn new(
72        package_name: impl Into<String>,
73        package_target: PackageTarget,
74        workspace_package: Option<String>,
75        manifest_path: PathBuf,
76        emit_warning: F,
77        config: Option<IntralinksConfig>,
78    ) -> DocTransformIntralinks<F> {
79        DocTransformIntralinks {
80            package_name: package_name.into(),
81            package_target,
82            workspace_package,
83            manifest_path,
84            emit_warning,
85            config: config.unwrap_or_default(),
86        }
87    }
88}
89
90#[derive(PartialEq, Eq, Hash, Clone, Debug)]
91struct ItemPath<'a> {
92    segments: Cow<'a, [String]>,
93}
94
95impl<'a> ItemPath<'a> {
96    fn new(segments: &'a [String]) -> ItemPath<'a> {
97        assert!(!segments.is_empty(), "path item must not be empty");
98
99        ItemPath { segments: Cow::Borrowed(segments) }
100    }
101
102    fn add(&self, segment: String) -> ItemPath<'static> {
103        let mut segments = self.segments.clone().into_owned();
104
105        segments.push(segment);
106
107        ItemPath { segments: Cow::Owned(segments) }
108    }
109
110    fn segments(&self) -> impl Iterator<Item = &str> {
111        self.segments.iter().map(String::as_str)
112    }
113
114    fn len(&self) -> usize {
115        self.segments.len()
116    }
117}
118
119impl Display for ItemPath<'_> {
120    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
121        // TODO Use standard library intersperse() one it stabilizes (https://github.com/rust-lang/rust/issues/79524).
122        let iter = Itertools::intersperse(self.segments.iter().map(String::as_str), "::");
123
124        for s in iter {
125            f.write_str(s)?;
126        }
127
128        Ok(())
129    }
130}
131
132fn has_intralinks(doc: &Doc) -> bool {
133    let link_targets =
134        markdown_link_iterator(&doc.markdown).items().filter_map(|link| match link {
135            MarkdownLink::Inline { link } => Some(link.link),
136            MarkdownLink::Reference { link: MarkdownReferenceLink::Shortcut { text } }
137                if is_intralink_shortcut(text.as_str()) =>
138            {
139                Some(Link::new(text.as_str().to_owned()))
140            }
141            MarkdownLink::Reference { .. } => None,
142        });
143    let reference_links = markdown_reference_link_definition_iterator(&doc.markdown)
144        .items()
145        .map(|link_def| link_def.link);
146
147    link_targets.chain(reference_links).any(|link| IntralinkResolver::is_intralink(&link))
148}
149
150impl<F> DocTransform for DocTransformIntralinks<F>
151where
152    F: Fn(&str),
153{
154    type E = IntralinkError;
155
156    fn transform(&self, doc: &Doc) -> Result<Doc, IntralinkError> {
157        // If there are no intralinks return immediately. No need to run `rustdoc` at all.
158        if !has_intralinks(doc) {
159            return Ok(doc.clone());
160        }
161
162        let strip_links = self.config.strip_links.unwrap_or(false);
163
164        let intralink_resolver: IntralinkResolver<'_> = match strip_links {
165            true => {
166                // Create an empty resolver, since we are going to strip all intralinks.
167                IntralinkResolver::new(self.package_name.as_str(), &self.config.docs_rs)
168            }
169            false => create_intralink_resolver(
170                self.package_name.as_str(),
171                &self.package_target,
172                self.workspace_package.as_deref(),
173                &self.manifest_path,
174                &self.config,
175            )?,
176        };
177
178        let doc = rewrite_links(doc, &intralink_resolver, &self.emit_warning, &self.config);
179
180        Ok(doc)
181    }
182}
183
184fn rewrite_links(
185    doc: &Doc,
186    intralink_resolver: &IntralinkResolver,
187    emit_warning: &impl Fn(&str),
188    config: &IntralinksConfig,
189) -> Doc {
190    let RewriteReferenceLinksResult { doc, reference_links_to_remove } =
191        rewrite_reference_links_definitions(doc, intralink_resolver, emit_warning, config);
192
193    rewrite_markdown_links(
194        &doc,
195        intralink_resolver,
196        emit_warning,
197        config,
198        &reference_links_to_remove,
199    )
200}
201
202enum MarkdownLinkAction {
203    Link(Link),
204    Preserve,
205    Strip,
206}
207
208fn ensure_backticked(text: &str) -> String {
209    let is_backticked = text.len() >= 2 && text.starts_with('`') && text.ends_with('`');
210
211    match is_backticked {
212        true => text.to_owned(),
213        false => format!("`{text}`"),
214    }
215}
216
217/// A shortcut link `[text]` looks like a Rust intralink if it is wrapped in backticks
218/// (`` [`Foo`] ``), contains a path separator (`[foo::Bar]`), or is a bare identifier (`[Foo]`).
219/// The three conditions overlap by design: backticked text typically wraps either an identifier
220/// or a path. We check each independently to keep the logic readable.
221///
222/// Intentionally permissive: in non-strip mode, false positives just miss in the resolver and
223/// fall through; in strip mode (where the resolver is empty), this is the only signal we have.
224fn is_intralink_shortcut(text: &str) -> bool {
225    let backticked = text.len() >= 2 && text.starts_with('`') && text.ends_with('`');
226    let has_path_separator = text.contains("::");
227    let is_bare_identifier =
228        !text.is_empty() && text.chars().all(|c| c.is_alphanumeric() || c == '_');
229
230    backticked || has_path_separator || is_bare_identifier
231}
232
233fn resolve_shortcut_intralink(
234    link: &MarkdownReferenceLink,
235    intralink_resolver: &IntralinkResolver,
236    strip_links: bool,
237) -> Option<MarkdownLinkAction> {
238    let MarkdownReferenceLink::Shortcut { text } = link else {
239        return None;
240    };
241
242    if strip_links {
243        return is_intralink_shortcut(text.as_str()).then_some(MarkdownLinkAction::Strip);
244    }
245
246    let candidate = Link::new(text.as_str().to_owned());
247    let url = intralink_resolver.resolve_link(&candidate)?;
248
249    let url = match candidate.link_fragment() {
250        Some(fragment) if !url.contains('#') => format!("{url}#{fragment}"),
251        _ => url.to_owned(),
252    };
253
254    Some(MarkdownLinkAction::Link(url.into()))
255}
256
257fn markdown_link(
258    link: &Link,
259    intralink_resolver: &IntralinkResolver,
260    emit_warning: &impl Fn(&str),
261) -> MarkdownLinkAction {
262    assert!(IntralinkResolver::is_intralink(link));
263
264    match intralink_resolver.resolve_link(link) {
265        None => {
266            emit_warning(&format!("Could not resolve definition of `{}`.", link.symbol()));
267
268            MarkdownLinkAction::Strip
269        }
270        Some(url) => {
271            // For impl items the resolved URL already carries an implicit fragment (e.g.
272            // `#method.foo`), so we drop any user-supplied fragment to avoid emitting a URL with
273            // two `#`s.
274            let url = match link.link_fragment() {
275                Some(fragment) if !url.contains('#') => format!("{url}#{fragment}"),
276                _ => url.to_owned(),
277            };
278
279            MarkdownLinkAction::Link(url.into())
280        }
281    }
282}
283
284fn rewrite_markdown_links(
285    doc: &Doc,
286    intralink_resolver: &IntralinkResolver,
287    emit_warning: &impl Fn(&str),
288    config: &IntralinksConfig,
289    reference_links_to_remove: &HashSet<UniCase<String>>,
290) -> Doc {
291    use crate::utils::ItemOrOther;
292
293    let strip_links = config.strip_links.unwrap_or(false);
294    let mut new_doc = String::with_capacity(doc.as_string().len() + 1024);
295
296    for item_or_other in markdown_link_iterator(&doc.markdown).complete() {
297        match item_or_other {
298            ItemOrOther::Item(MarkdownLink::Inline { link: inline_link }) => {
299                let markdown_link: MarkdownLinkAction =
300                    match IntralinkResolver::is_intralink(&inline_link.link) {
301                        true => match strip_links {
302                            false => {
303                                markdown_link(&inline_link.link, intralink_resolver, emit_warning)
304                            }
305                            true => MarkdownLinkAction::Strip,
306                        },
307                        false => MarkdownLinkAction::Preserve,
308                    };
309
310                match markdown_link {
311                    MarkdownLinkAction::Link(markdown_link) => {
312                        new_doc.push_str(&inline_link.with_link(markdown_link).to_string());
313                    }
314                    MarkdownLinkAction::Preserve => {
315                        new_doc.push_str(&inline_link.to_string());
316                    }
317                    MarkdownLinkAction::Strip => {
318                        new_doc.push_str(&inline_link.text);
319                    }
320                }
321            }
322            ItemOrOther::Item(MarkdownLink::Reference { link }) => {
323                if reference_links_to_remove.contains(link.label()) {
324                    new_doc.push_str(link.text());
325                } else if let Some(action) =
326                    resolve_shortcut_intralink(&link, intralink_resolver, strip_links)
327                {
328                    // Shortcut intralinks render as code in rustdoc HTML; preserve that by
329                    // ensuring the link text is backticked in the README output.
330                    let backticked = ensure_backticked(link.text());
331
332                    match action {
333                        MarkdownLinkAction::Link(resolved) => {
334                            let inline = MarkdownInlineLink { text: backticked, link: resolved };
335
336                            new_doc.push_str(&inline.to_string());
337                        }
338                        MarkdownLinkAction::Strip => new_doc.push_str(&backticked),
339                        MarkdownLinkAction::Preserve => new_doc.push_str(&link.to_string()),
340                    }
341                } else {
342                    new_doc.push_str(&link.to_string());
343                }
344            }
345            ItemOrOther::Other(other) => {
346                new_doc.push_str(other);
347            }
348        }
349    }
350
351    Doc::from_str(new_doc)
352}
353
354struct RewriteReferenceLinksResult {
355    doc: Doc,
356    reference_links_to_remove: HashSet<UniCase<String>>,
357}
358
359fn rewrite_reference_links_definitions(
360    doc: &Doc,
361    intralink_resolver: &IntralinkResolver,
362    emit_warning: &impl Fn(&str),
363    config: &IntralinksConfig,
364) -> RewriteReferenceLinksResult {
365    use crate::utils::ItemOrOther;
366    let mut reference_links_to_remove = HashSet::new();
367    let mut new_doc = String::with_capacity(doc.as_string().len() + 1024);
368    let mut skip_next_newline = false;
369    let strip_links = config.strip_links.unwrap_or(false);
370
371    let iter = markdown_reference_link_definition_iterator(&doc.markdown);
372
373    for item_or_other in iter.complete() {
374        match item_or_other {
375            ItemOrOther::Item(link_ref_def) => {
376                let markdown_link: MarkdownLinkAction =
377                    match IntralinkResolver::is_intralink(&link_ref_def.link) {
378                        true => match strip_links {
379                            false => {
380                                markdown_link(&link_ref_def.link, intralink_resolver, emit_warning)
381                            }
382                            true => MarkdownLinkAction::Strip,
383                        },
384                        false => MarkdownLinkAction::Preserve,
385                    };
386
387                match markdown_link {
388                    MarkdownLinkAction::Link(link) => {
389                        new_doc.push_str(&link_ref_def.with_link(link).to_string());
390                    }
391                    MarkdownLinkAction::Preserve => {
392                        new_doc.push_str(&link_ref_def.to_string());
393                    }
394                    MarkdownLinkAction::Strip => {
395                        // Do not emit anything to new_doc.
396                        reference_links_to_remove.insert(link_ref_def.label);
397                        skip_next_newline = true;
398                    }
399                }
400            }
401            ItemOrOther::Other(other) => {
402                let other = match skip_next_newline {
403                    true => {
404                        skip_next_newline = false;
405                        let next_index = other
406                            .chars()
407                            .enumerate()
408                            .skip_while(|(_, c)| c.is_whitespace() && *c != '\n')
409                            .skip(1)
410                            .map(|(i, _)| i)
411                            .next();
412
413                        next_index.and_then(|i| other.get(i..)).unwrap_or("")
414                    }
415                    false => other,
416                };
417                new_doc.push_str(other);
418            }
419        }
420    }
421
422    RewriteReferenceLinksResult { doc: Doc::from_str(new_doc), reference_links_to_remove }
423}
424
425#[cfg(test)]
426mod tests {
427    use super::*;
428    use pretty_assertions::assert_eq;
429
430    #[test]
431    fn test_ensure_backticked() {
432        assert_eq!(ensure_backticked("Foo"), "`Foo`");
433        assert_eq!(ensure_backticked("foo::Bar"), "`foo::Bar`");
434        assert_eq!(ensure_backticked("`Foo`"), "`Foo`");
435        assert_eq!(ensure_backticked("`foo::Bar`"), "`foo::Bar`");
436        assert_eq!(ensure_backticked(""), "``");
437        // A lone backtick is not a matched pair.
438        assert_eq!(ensure_backticked("`"), "```");
439        // Already-paired empty backticks stay as is.
440        assert_eq!(ensure_backticked("``"), "``");
441        // Inner backticks are not stripped.
442        assert_eq!(ensure_backticked("a`b"), "`a`b`");
443    }
444
445    #[test]
446    fn test_is_intralink_shortcut() {
447        // Bare identifiers.
448        assert!(is_intralink_shortcut("Foo"));
449        assert!(is_intralink_shortcut("foo"));
450        assert!(is_intralink_shortcut("_foo"));
451        assert!(is_intralink_shortcut("Foo123"));
452
453        // Path-separated forms.
454        assert!(is_intralink_shortcut("foo::Bar"));
455        assert!(is_intralink_shortcut("crate::foo::Bar"));
456        assert!(is_intralink_shortcut("a b::c")); // whitespace ok if `::` is present.
457
458        // Backticked forms.
459        assert!(is_intralink_shortcut("`Foo`"));
460        assert!(is_intralink_shortcut("`foo::Bar`"));
461        assert!(is_intralink_shortcut("`Foo()`"));
462        assert!(is_intralink_shortcut("`Foo!`"));
463
464        // Rejected: not intralink-shaped.
465        assert!(!is_intralink_shortcut("some text"));
466        assert!(!is_intralink_shortcut("Foo!"));
467        assert!(!is_intralink_shortcut("Foo()"));
468        assert!(!is_intralink_shortcut(""));
469        assert!(!is_intralink_shortcut("`Foo")); // unmatched backtick, no `::`, has backtick.
470    }
471}