Skip to main content

cargo_rdme/transform/intralinks/
mod.rs

1use crate::transform::DocTransform;
2use crate::transform::intralinks::links::{
3    Link, MarkdownInlineLink, MarkdownLink, MarkdownReferenceLink, markdown_link_iterator,
4    markdown_reference_link_definition_iterator,
5};
6pub use crate::transform::intralinks::rustdoc::{
7    EXPECTED_RUST_TOOLCHAIN, install_expected_rust_toolchain, is_expected_rust_toolchain_installed,
8};
9use crate::transform::intralinks::rustdoc::{IntralinkResolver, create_intralink_resolver};
10use crate::{Doc, PackageTarget};
11use itertools::Itertools;
12use std::borrow::Cow;
13use std::collections::HashSet;
14use std::fmt::Display;
15use std::path::PathBuf;
16use thiserror::Error;
17use unicase::UniCase;
18
19mod links;
20mod rustdoc;
21
22#[derive(Error, Debug)]
23pub enum IntralinkError {
24    #[error("failed to run rustdoc: {error}")]
25    RustdocError {
26        #[source]
27        error: rustdoc_json::BuildError,
28    },
29    #[error("failed to run rustdoc:\n{stderr}")]
30    BuildRustdocError { stderr: String },
31    #[error("failed to read rustdoc json file: {io_error}")]
32    ReadRustdocError {
33        #[source]
34        io_error: std::io::Error,
35    },
36    #[error("failed to parse rustdoc json file: {serde_error}")]
37    ParseRustdocError { serde_error: serde_json::Error },
38    #[error("unsupported rustdoc format version {version} (expected version {expected_version})")]
39    UnsupportedRustdocFormatVersion { version: u32, expected_version: u32 },
40    #[error(
41        "rust toolchain not installed: {expected}\n\n\
42         `cargo-rdme` needs {expected} to do intralink resolution. To install it run:\n\n    \
43         rustup toolchain install {expected}\n\n\
44         or, equivalently, run `cargo rdme install-rust-toolchain-for-intralinks`."
45    )]
46    RustToolchainNotInstalled { expected: &'static str },
47    #[error("failed to run rustup toolchain: {error}")]
48    RustupToolchain { error: rustup_toolchain::Error },
49}
50
51#[derive(Default, Debug, PartialEq, Eq, Clone)]
52pub struct IntralinksDocsRsConfig {
53    pub docs_rs_base_url: Option<String>,
54    pub docs_rs_version: Option<String>,
55}
56
57#[derive(Default, Debug, PartialEq, Eq, Clone)]
58pub struct IntralinksConfig {
59    pub docs_rs: IntralinksDocsRsConfig,
60    pub strip_links: Option<bool>,
61    pub all_features: Option<bool>,
62    pub features: Option<Vec<String>>,
63    pub no_default_features: Option<bool>,
64}
65
66pub struct DocTransformIntralinks<F> {
67    package_name: String,
68    package_target: PackageTarget,
69    workspace_package: Option<String>,
70    manifest_path: PathBuf,
71    emit_warning: F,
72    config: IntralinksConfig,
73}
74
75impl<F> DocTransformIntralinks<F>
76where
77    F: Fn(&str),
78{
79    pub fn new(
80        package_name: impl Into<String>,
81        package_target: PackageTarget,
82        workspace_package: Option<String>,
83        manifest_path: PathBuf,
84        emit_warning: F,
85        config: Option<IntralinksConfig>,
86    ) -> DocTransformIntralinks<F> {
87        DocTransformIntralinks {
88            package_name: package_name.into(),
89            package_target,
90            workspace_package,
91            manifest_path,
92            emit_warning,
93            config: config.unwrap_or_default(),
94        }
95    }
96}
97
98#[derive(PartialEq, Eq, Hash, Clone, Debug)]
99struct ItemPath<'a> {
100    segments: Cow<'a, [String]>,
101}
102
103impl<'a> ItemPath<'a> {
104    fn new(segments: &'a [String]) -> ItemPath<'a> {
105        assert!(!segments.is_empty(), "path item must not be empty");
106
107        ItemPath { segments: Cow::Borrowed(segments) }
108    }
109
110    fn add(&self, segment: String) -> ItemPath<'static> {
111        let mut segments = self.segments.clone().into_owned();
112
113        segments.push(segment);
114
115        ItemPath { segments: Cow::Owned(segments) }
116    }
117
118    fn segments(&self) -> impl Iterator<Item = &str> {
119        self.segments.iter().map(String::as_str)
120    }
121
122    fn len(&self) -> usize {
123        self.segments.len()
124    }
125}
126
127impl Display for ItemPath<'_> {
128    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
129        // TODO Use standard library intersperse() one it stabilizes (https://github.com/rust-lang/rust/issues/79524).
130        let iter = Itertools::intersperse(self.segments.iter().map(String::as_str), "::");
131
132        for s in iter {
133            f.write_str(s)?;
134        }
135
136        Ok(())
137    }
138}
139
140fn has_intralinks(doc: &Doc) -> bool {
141    let link_targets =
142        markdown_link_iterator(&doc.markdown).items().filter_map(|link| match link {
143            MarkdownLink::Inline { link } => Some(link.link),
144            MarkdownLink::Reference { link: MarkdownReferenceLink::Shortcut { text } }
145                if is_intralink_shortcut(text.as_str()) =>
146            {
147                Some(Link::new(text.as_str().to_owned()))
148            }
149            MarkdownLink::Reference { .. } => None,
150        });
151    let reference_links = markdown_reference_link_definition_iterator(&doc.markdown)
152        .items()
153        .map(|link_def| link_def.link);
154
155    link_targets.chain(reference_links).any(|link| IntralinkResolver::is_intralink(&link))
156}
157
158impl<F> DocTransform for DocTransformIntralinks<F>
159where
160    F: Fn(&str),
161{
162    type E = IntralinkError;
163
164    fn transform(&self, doc: &Doc) -> Result<Doc, IntralinkError> {
165        // If there are no intralinks return immediately. No need to run `rustdoc` at all.
166        if !has_intralinks(doc) {
167            return Ok(doc.clone());
168        }
169
170        let strip_links = self.config.strip_links.unwrap_or(false);
171
172        let intralink_resolver: IntralinkResolver<'_> = match strip_links {
173            true => {
174                // Create an empty resolver, since we are going to strip all intralinks.
175                IntralinkResolver::new(self.package_name.as_str(), &self.config.docs_rs)
176            }
177            false => create_intralink_resolver(
178                self.package_name.as_str(),
179                &self.package_target,
180                self.workspace_package.as_deref(),
181                &self.manifest_path,
182                &self.config,
183            )?,
184        };
185
186        let doc = rewrite_links(doc, &intralink_resolver, &self.emit_warning, &self.config);
187
188        Ok(doc)
189    }
190}
191
192fn rewrite_links(
193    doc: &Doc,
194    intralink_resolver: &IntralinkResolver,
195    emit_warning: &impl Fn(&str),
196    config: &IntralinksConfig,
197) -> Doc {
198    let RewriteReferenceLinksResult { doc, reference_links_to_remove } =
199        rewrite_reference_links_definitions(doc, intralink_resolver, emit_warning, config);
200
201    rewrite_markdown_links(
202        &doc,
203        intralink_resolver,
204        emit_warning,
205        config,
206        &reference_links_to_remove,
207    )
208}
209
210enum MarkdownLinkAction {
211    Link(Link),
212    Preserve,
213    Strip,
214}
215
216fn ensure_backticked(text: &str) -> String {
217    let is_backticked = text.len() >= 2 && text.starts_with('`') && text.ends_with('`');
218
219    match is_backticked {
220        true => text.to_owned(),
221        false => format!("`{text}`"),
222    }
223}
224
225/// A shortcut link `[text]` looks like a Rust intralink if it is wrapped in backticks
226/// (`` [`Foo`] ``), contains a path separator (`[foo::Bar]`), or is a bare identifier (`[Foo]`).
227/// The three conditions overlap by design: backticked text typically wraps either an identifier
228/// or a path. We check each independently to keep the logic readable.
229///
230/// Intentionally permissive: in non-strip mode, false positives just miss in the resolver and
231/// fall through; in strip mode (where the resolver is empty), this is the only signal we have.
232fn is_intralink_shortcut(text: &str) -> bool {
233    let backticked = text.len() >= 2 && text.starts_with('`') && text.ends_with('`');
234    let has_path_separator = text.contains("::");
235    let is_bare_identifier =
236        !text.is_empty() && text.chars().all(|c| c.is_alphanumeric() || c == '_');
237
238    backticked || has_path_separator || is_bare_identifier
239}
240
241fn resolve_shortcut_intralink(
242    link: &MarkdownReferenceLink,
243    intralink_resolver: &IntralinkResolver,
244    strip_links: bool,
245) -> Option<MarkdownLinkAction> {
246    let MarkdownReferenceLink::Shortcut { text } = link else {
247        return None;
248    };
249
250    if strip_links {
251        return is_intralink_shortcut(text.as_str()).then_some(MarkdownLinkAction::Strip);
252    }
253
254    let candidate = Link::new(text.as_str().to_owned());
255    let url = intralink_resolver.resolve_link(&candidate)?;
256
257    let url = match candidate.link_fragment() {
258        Some(fragment) if !url.contains('#') => format!("{url}#{fragment}"),
259        _ => url.to_owned(),
260    };
261
262    Some(MarkdownLinkAction::Link(url.into()))
263}
264
265fn markdown_link(
266    link: &Link,
267    intralink_resolver: &IntralinkResolver,
268    emit_warning: &impl Fn(&str),
269) -> MarkdownLinkAction {
270    assert!(IntralinkResolver::is_intralink(link));
271
272    match intralink_resolver.resolve_link(link) {
273        None => {
274            emit_warning(&format!("Could not resolve definition of `{}`.", link.symbol()));
275
276            MarkdownLinkAction::Strip
277        }
278        Some(url) => {
279            // For impl items the resolved URL already carries an implicit fragment (e.g.
280            // `#method.foo`), so we drop any user-supplied fragment to avoid emitting a URL with
281            // two `#`s.
282            let url = match link.link_fragment() {
283                Some(fragment) if !url.contains('#') => format!("{url}#{fragment}"),
284                _ => url.to_owned(),
285            };
286
287            MarkdownLinkAction::Link(url.into())
288        }
289    }
290}
291
292fn rewrite_markdown_links(
293    doc: &Doc,
294    intralink_resolver: &IntralinkResolver,
295    emit_warning: &impl Fn(&str),
296    config: &IntralinksConfig,
297    reference_links_to_remove: &HashSet<UniCase<String>>,
298) -> Doc {
299    use crate::utils::ItemOrOther;
300
301    let strip_links = config.strip_links.unwrap_or(false);
302    let mut new_doc = String::with_capacity(doc.as_string().len() + 1024);
303
304    for item_or_other in markdown_link_iterator(&doc.markdown).complete() {
305        match item_or_other {
306            ItemOrOther::Item(MarkdownLink::Inline { link: inline_link }) => {
307                let markdown_link: MarkdownLinkAction =
308                    match IntralinkResolver::is_intralink(&inline_link.link) {
309                        true => match strip_links {
310                            false => {
311                                markdown_link(&inline_link.link, intralink_resolver, emit_warning)
312                            }
313                            true => MarkdownLinkAction::Strip,
314                        },
315                        false => MarkdownLinkAction::Preserve,
316                    };
317
318                match markdown_link {
319                    MarkdownLinkAction::Link(markdown_link) => {
320                        new_doc.push_str(&inline_link.with_link(markdown_link).to_string());
321                    }
322                    MarkdownLinkAction::Preserve => {
323                        new_doc.push_str(&inline_link.to_string());
324                    }
325                    MarkdownLinkAction::Strip => {
326                        new_doc.push_str(&inline_link.text);
327                    }
328                }
329            }
330            ItemOrOther::Item(MarkdownLink::Reference { link }) => {
331                if reference_links_to_remove.contains(link.label()) {
332                    new_doc.push_str(link.text());
333                } else if let Some(action) =
334                    resolve_shortcut_intralink(&link, intralink_resolver, strip_links)
335                {
336                    // Shortcut intralinks render as code in rustdoc HTML; preserve that by
337                    // ensuring the link text is backticked in the README output.
338                    let backticked = ensure_backticked(link.text());
339
340                    match action {
341                        MarkdownLinkAction::Link(resolved) => {
342                            let inline = MarkdownInlineLink { text: backticked, link: resolved };
343
344                            new_doc.push_str(&inline.to_string());
345                        }
346                        MarkdownLinkAction::Strip => new_doc.push_str(&backticked),
347                        MarkdownLinkAction::Preserve => new_doc.push_str(&link.to_string()),
348                    }
349                } else {
350                    new_doc.push_str(&link.to_string());
351                }
352            }
353            ItemOrOther::Other(other) => {
354                new_doc.push_str(other);
355            }
356        }
357    }
358
359    Doc::from_str(new_doc)
360}
361
362struct RewriteReferenceLinksResult {
363    doc: Doc,
364    reference_links_to_remove: HashSet<UniCase<String>>,
365}
366
367fn rewrite_reference_links_definitions(
368    doc: &Doc,
369    intralink_resolver: &IntralinkResolver,
370    emit_warning: &impl Fn(&str),
371    config: &IntralinksConfig,
372) -> RewriteReferenceLinksResult {
373    use crate::utils::ItemOrOther;
374    let mut reference_links_to_remove = HashSet::new();
375    let mut new_doc = String::with_capacity(doc.as_string().len() + 1024);
376    let mut skip_next_newline = false;
377    let strip_links = config.strip_links.unwrap_or(false);
378
379    let iter = markdown_reference_link_definition_iterator(&doc.markdown);
380
381    for item_or_other in iter.complete() {
382        match item_or_other {
383            ItemOrOther::Item(link_ref_def) => {
384                let markdown_link: MarkdownLinkAction =
385                    match IntralinkResolver::is_intralink(&link_ref_def.link) {
386                        true => match strip_links {
387                            false => {
388                                markdown_link(&link_ref_def.link, intralink_resolver, emit_warning)
389                            }
390                            true => MarkdownLinkAction::Strip,
391                        },
392                        false => MarkdownLinkAction::Preserve,
393                    };
394
395                match markdown_link {
396                    MarkdownLinkAction::Link(link) => {
397                        new_doc.push_str(&link_ref_def.with_link(link).to_string());
398                    }
399                    MarkdownLinkAction::Preserve => {
400                        new_doc.push_str(&link_ref_def.to_string());
401                    }
402                    MarkdownLinkAction::Strip => {
403                        // Do not emit anything to new_doc.
404                        reference_links_to_remove.insert(link_ref_def.label);
405                        skip_next_newline = true;
406                    }
407                }
408            }
409            ItemOrOther::Other(other) => {
410                let other = match skip_next_newline {
411                    true => {
412                        skip_next_newline = false;
413                        let next_index = other
414                            .chars()
415                            .enumerate()
416                            .skip_while(|(_, c)| c.is_whitespace() && *c != '\n')
417                            .skip(1)
418                            .map(|(i, _)| i)
419                            .next();
420
421                        next_index.and_then(|i| other.get(i..)).unwrap_or("")
422                    }
423                    false => other,
424                };
425                new_doc.push_str(other);
426            }
427        }
428    }
429
430    RewriteReferenceLinksResult { doc: Doc::from_str(new_doc), reference_links_to_remove }
431}
432
433#[cfg(test)]
434mod tests {
435    use super::*;
436    use pretty_assertions::assert_eq;
437
438    #[test]
439    fn test_ensure_backticked() {
440        assert_eq!(ensure_backticked("Foo"), "`Foo`");
441        assert_eq!(ensure_backticked("foo::Bar"), "`foo::Bar`");
442        assert_eq!(ensure_backticked("`Foo`"), "`Foo`");
443        assert_eq!(ensure_backticked("`foo::Bar`"), "`foo::Bar`");
444        assert_eq!(ensure_backticked(""), "``");
445        // A lone backtick is not a matched pair.
446        assert_eq!(ensure_backticked("`"), "```");
447        // Already-paired empty backticks stay as is.
448        assert_eq!(ensure_backticked("``"), "``");
449        // Inner backticks are not stripped.
450        assert_eq!(ensure_backticked("a`b"), "`a`b`");
451    }
452
453    #[test]
454    fn test_is_intralink_shortcut() {
455        // Bare identifiers.
456        assert!(is_intralink_shortcut("Foo"));
457        assert!(is_intralink_shortcut("foo"));
458        assert!(is_intralink_shortcut("_foo"));
459        assert!(is_intralink_shortcut("Foo123"));
460
461        // Path-separated forms.
462        assert!(is_intralink_shortcut("foo::Bar"));
463        assert!(is_intralink_shortcut("crate::foo::Bar"));
464        assert!(is_intralink_shortcut("a b::c")); // whitespace ok if `::` is present.
465
466        // Backticked forms.
467        assert!(is_intralink_shortcut("`Foo`"));
468        assert!(is_intralink_shortcut("`foo::Bar`"));
469        assert!(is_intralink_shortcut("`Foo()`"));
470        assert!(is_intralink_shortcut("`Foo!`"));
471
472        // Rejected: not intralink-shaped.
473        assert!(!is_intralink_shortcut("some text"));
474        assert!(!is_intralink_shortcut("Foo!"));
475        assert!(!is_intralink_shortcut("Foo()"));
476        assert!(!is_intralink_shortcut(""));
477        assert!(!is_intralink_shortcut("`Foo")); // unmatched backtick, no `::`, has backtick.
478    }
479}