cargo_docs_md/generator/
doc_links.rs

1//! Intra-doc link processing for documentation generation.
2//!
3//! This module provides [`DocLinkProcessor`] which transforms rustdoc
4//! intra-doc link syntax into proper markdown links.
5//!
6//! # Processing Pipeline
7//! The processor applies transformations in this order:
8//! 1. Strip markdown reference definitions
9//! 2. Unhide rustdoc hidden lines in code blocks
10//! 3. Process reference-style links `[text][`ref`]`
11//! 4. Process path reference links `[text][crate::path]`
12//! 5. Process method links `[Type::method]`
13//! 6. Process backtick links `[`Name`]`
14//! 7. Process plain links `[name]`
15//! 8. Convert HTML-style rustdoc links
16//! 9. Clean up blank lines
17//!
18//! Links inside code blocks are protected from transformation.
19
20use std::collections::HashMap;
21use std::sync::LazyLock;
22
23use regex::Regex;
24use rustdoc_types::{Crate, Id, ItemKind};
25
26use crate::linker::{item_has_anchor, LinkRegistry};
27
28// =============================================================================
29// Static Regex Patterns (compiled once, reused everywhere)
30// =============================================================================
31
32/// Regex for HTML-style rustdoc links.
33/// Matches: `(struct.Name.html)` or `(enum.Name.html#method.foo)`
34static HTML_LINK_RE: LazyLock<Regex> = LazyLock::new(|| {
35    Regex::new(concat!(
36        r"\((struct|enum|trait|fn|type|macro|constant|mod)\.",
37        r"([A-Za-z_][A-Za-z0-9_]*)\.html",
38        r"(?:#([a-z]+)\.([A-Za-z_][A-Za-z0-9_]*))?\)",
39    ))
40    .unwrap()
41});
42
43/// Regex for path-style reference links.
44///
45/// Matches: `[display][crate::path::Item]`
46///
47/// Used for rustdoc's reference-style intra-doc links where the display text
48/// differs from the path reference.
49///
50/// # Capture Groups
51/// - Group 1: Display text (anything except `]`)
52/// - Group 2: Rust path with `::` separators (e.g., `crate::module::Item`)
53///
54/// # Pattern Breakdown
55/// ```text
56/// \[([^\]]+)\]              # [display text] - capture non-] chars
57/// \[                        # Opening bracket for reference
58/// ([a-zA-Z_][a-zA-Z0-9_]*   # First path segment (valid Rust identifier)
59/// (?:::[a-zA-Z_][a-zA-Z0-9_]*)+  # One or more ::segment pairs (requires at least one ::)
60/// )\]                       # Close capture and bracket
61/// ```
62///
63/// # Note
64/// The pattern requires at least one `::` separator, so it won't match
65/// single identifiers like `[text][Name]`.
66static PATH_REF_LINK_RE: LazyLock<Regex> = LazyLock::new(|| {
67    Regex::new(r"\[([^\]]+)\]\[([a-zA-Z_][a-zA-Z0-9_]*(?:::[a-zA-Z_][a-zA-Z0-9_]*)+)\]").unwrap()
68});
69
70/// Regex for backtick code links.
71///
72/// Matches: `` [`Name`] `` (the most common intra-doc link format)
73///
74/// This is the primary pattern for rustdoc intra-doc links. The backticks
75/// indicate the link should be rendered as inline code.
76///
77/// # Capture Groups
78/// - Group 1: The link text inside backticks (e.g., `Name`, `path::Item`)
79///
80/// # Pattern Breakdown
81/// ```text
82/// \[`        # Literal "[`" - opening bracket and backtick
83/// ([^`]+)    # Capture: one or more non-backtick characters
84/// `\]        # Literal "`]" - closing backtick and bracket
85/// ```
86///
87/// # Processing Note
88/// The code checks if the match is followed by `(` to avoid double-processing
89/// already-converted markdown links like `` [`Name`](url) ``.
90static BACKTICK_LINK_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[`([^`]+)`\]").unwrap());
91
92/// Regex for reference-style links with backticks.
93///
94/// Matches: `` [display text][`ref`] ``
95///
96/// This pattern handles rustdoc reference-style links where custom display
97/// text links to a backtick-wrapped reference.
98///
99/// # Capture Groups
100/// - Group 1: Display text (what the user sees)
101/// - Group 2: Reference text inside backticks (the actual link target)
102///
103/// # Pattern Breakdown
104/// ```text
105/// \[([^\]]+)\]   # [display text] - capture anything except ]
106/// \[`            # Opening "[`" for the reference
107/// ([^`]+)        # Capture: reference name (non-backtick chars)
108/// `\]            # Closing "`]"
109/// ```
110///
111/// # Example
112/// `` [custom text][`HashMap`] `` renders as "custom text" linking to `HashMap`.
113static REFERENCE_LINK_RE: LazyLock<Regex> =
114    LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\[`([^`]+)`\]").unwrap());
115
116/// Regex for markdown reference definitions.
117///
118/// Matches: `` [`Name`]: path::to::item `` at line start
119///
120/// These are markdown reference definition lines that rustdoc uses internally.
121/// We strip these from output since intra-doc links are resolved directly.
122///
123/// # Pattern Breakdown
124/// ```text
125/// (?m)       # Multi-line mode: ^ and $ match line boundaries
126/// ^          # Start of line
127/// \s*        # Optional leading whitespace
128/// \[`[^`]+`\]  # Backtick link syntax (not captured)
129/// :          # Literal colon separator
130/// \s*        # Optional whitespace after colon
131/// \S+        # The target path (non-whitespace chars)
132/// \s*        # Optional trailing whitespace
133/// $          # End of line
134/// ```
135///
136/// # Note
137/// This pattern doesn't capture groups because it's used with `replace_all`
138/// to remove entire lines.
139///
140/// Matches various reference definition formats:
141/// - `[`Foo`]: crate::Foo` (backtick style)
142/// - `[name]: crate::path` (plain style)
143/// - `[name](#anchor): crate::path` (with anchor)
144static REFERENCE_DEF_RE: LazyLock<Regex> =
145    LazyLock::new(|| Regex::new(r"(?m)^\s*\[[^\]]+\](?:\([^)]*\))?:\s*\S+\s*$").unwrap());
146
147/// Regex for plain identifier links.
148///
149/// Matches: `[name]` where name is a valid Rust identifier
150///
151/// This handles the simplest intra-doc link format without backticks.
152/// Used less frequently than backtick links but still valid rustdoc syntax.
153///
154/// # Capture Groups
155/// - Group 1: The identifier name
156///
157/// # Pattern Breakdown
158/// ```text
159/// \[                      # Opening bracket
160/// ([a-zA-Z_]              # Capture start: letter or underscore (Rust identifier rules)
161/// [a-zA-Z0-9_]*)          # Followed by alphanumeric or underscore
162/// \]                      # Closing bracket
163/// ```
164///
165/// # Processing Note
166/// The code checks if the match is followed by `(` or `[` to avoid
167/// false positives on existing markdown links or reference-style links.
168/// Also only processes if the identifier exists in `item_links`.
169static PLAIN_LINK_RE: LazyLock<Regex> =
170    LazyLock::new(|| Regex::new(r"\[([a-zA-Z_][a-zA-Z0-9_]*)\]").unwrap());
171
172/// Regex for method/associated item links.
173///
174/// Matches: `` [`Type::method`] `` or `` [`mod::Type::CONST`] ``
175///
176/// Handles links to methods, associated functions, constants, and other
177/// items accessed via `::` path notation. This includes both type-level
178/// paths (`Type::method`) and module-level paths (`mod::Type::CONST`).
179///
180/// # Capture Groups
181/// - Group 1: The full path including `::` separators
182///
183/// # Pattern Breakdown
184/// ```text
185/// \[`                              # Opening "[`"
186/// (                                # Start capture group
187///   [A-Za-z_][A-Za-z0-9_]*         # First segment (Rust identifier)
188///   (?:::[A-Za-z_][A-Za-z0-9_]*)+  # One or more ::segment pairs
189/// )                                # End capture group
190/// `\]                              # Closing "`]"
191/// ```
192///
193/// # Examples Matched
194/// - `` [`HashMap::new`] `` - associated function
195/// - `` [`Option::Some`] `` - enum variant
196/// - `` [`Iterator::next`] `` - trait method
197/// - `` [`std::vec::Vec`] `` - fully qualified path
198///
199/// # Processing Note
200/// The last segment after `::` is used as the anchor (lowercased).
201/// The type path before the last `::` is used to find the target file.
202static METHOD_LINK_RE: LazyLock<Regex> = LazyLock::new(|| {
203    Regex::new(r"\[`([A-Za-z_][A-Za-z0-9_]*(?:::[A-Za-z_][A-Za-z0-9_]*)+)`\]").unwrap()
204});
205
206// =============================================================================
207// Standalone Functions
208// =============================================================================
209
210/// Convert HTML-style rustdoc links to markdown anchors.
211///
212/// Transforms links like:
213/// - `(enum.NumberPrefix.html)` -> `(#numberprefix)`
214/// - `(struct.Foo.html#method.bar)` -> removes the link (methods don't have anchors)
215///
216/// This is useful for multi-crate documentation where the full processor
217/// context may not be available.
218#[must_use]
219pub fn convert_html_links(docs: &str) -> String {
220    replace_with_regex(docs, &HTML_LINK_RE, |caps| {
221        let item_name = &caps[2];
222
223        // If there's a method/variant anchor part, remove the link entirely
224        // since methods don't have individual headings
225        if caps.get(4).is_some() {
226            // Return empty to remove the (link) part, keeping just the display text
227            String::new()
228        } else {
229            // Type-level anchor should exist
230            format!("(#{})", item_name.to_lowercase())
231        }
232    })
233}
234
235/// Strip duplicate title from documentation.
236///
237/// Some crate/module docs start with `# title` which duplicates the generated
238/// `# Crate 'name'` or `# Module 'name'` heading.
239///
240/// # Arguments
241///
242/// * `docs` - The documentation string to process
243/// * `item_name` - The name of the crate or module being documented
244///
245/// # Returns
246///
247/// The docs with the leading title removed if it matches the item name,
248/// otherwise the original docs unchanged.
249#[must_use]
250pub fn strip_duplicate_title<'a>(docs: &'a str, item_name: &str) -> &'a str {
251    let Some(first_line) = docs.lines().next() else {
252        return docs;
253    };
254
255    let Some(title) = first_line.strip_prefix("# ") else {
256        return docs;
257    };
258
259    // Normalize the title:
260    // - Remove backticks (e.g., `clap_builder` -> clap_builder)
261    // - Replace spaces with underscores (e.g., "Serde JSON" -> "serde_json")
262    // - Replace hyphens with underscores (e.g., "my-crate" -> "my_crate")
263    // - Lowercase for comparison
264    let normalized_title = title
265        .trim()
266        .replace('`', "")
267        .replace(['-', ' '], "_")
268        .to_lowercase();
269
270    let normalized_name = item_name.replace('-', "_").to_lowercase();
271
272    if normalized_title == normalized_name {
273        // Skip the first line and any following blank lines
274        docs[first_line.len()..].trim_start_matches('\n')
275    } else {
276        docs
277    }
278}
279
280/// Strip markdown reference definition lines.
281///
282/// Removes lines like `[`Name`]: path::to::item` which are no longer needed
283/// after intra-doc links are processed.
284pub fn strip_reference_definitions(docs: &str) -> String {
285    REFERENCE_DEF_RE.replace_all(docs, "").to_string()
286}
287
288/// Unhide rustdoc hidden lines in code blocks and add language identifiers.
289///
290/// This function performs two transformations on code blocks:
291/// 1. Lines starting with `# ` inside code blocks are hidden in rustdoc
292///    but compiled. We remove the prefix to show the full example.
293/// 2. Bare code fences (` ``` `) are converted to ` ```rust ` since doc
294///    examples are Rust code.
295#[must_use]
296pub fn unhide_code_lines(docs: &str) -> String {
297    let mut result = String::with_capacity(docs.len());
298    let mut in_code_block = false;
299    let mut fence: Option<&str> = None;
300
301    for line in docs.lines() {
302        let trimmed = line.trim_start();
303
304        // Track code block boundaries
305        if let Some(f) = detect_fence(trimmed) {
306            if in_code_block && fence.is_some_and(|open| trimmed.starts_with(open)) {
307                // Closing fence
308                in_code_block = false;
309                fence = None;
310                result.push_str(line);
311            } else if !in_code_block {
312                // Opening fence - check if it needs a language identifier
313                in_code_block = true;
314                fence = Some(f);
315
316                // Add `rust` to bare fences (``` or ~~~)
317                let leading_ws = &line[..line.len() - trimmed.len()];
318                if trimmed == "```" || trimmed == "~~~" {
319                    result.push_str(leading_ws);
320                    result.push_str(trimmed);
321                    result.push_str("rust");
322                } else {
323                    result.push_str(line);
324                }
325            } else {
326                // Nested fence (different style) - just pass through
327                result.push_str(line);
328            }
329            result.push('\n');
330            continue;
331        }
332
333        if in_code_block {
334            let leading_ws = &line[..line.len() - trimmed.len()];
335
336            if trimmed == "#" {
337                // Just "#" becomes empty line (newline added below)
338            } else if let Some(rest) = trimmed.strip_prefix("# ") {
339                // "# code" becomes "code"
340                result.push_str(leading_ws);
341                result.push_str(rest);
342            } else {
343                result.push_str(line);
344            }
345        } else {
346            result.push_str(line);
347        }
348        result.push('\n');
349    }
350
351    // Remove trailing newline if original didn't have one
352    if !docs.ends_with('\n') && result.ends_with('\n') {
353        result.pop();
354    }
355
356    result
357}
358
359/// Detect a code fence and return the fence string.
360fn detect_fence(trimmed: &str) -> Option<&'static str> {
361    if trimmed.starts_with("```") {
362        Some("```")
363    } else if trimmed.starts_with("~~~") {
364        Some("~~~")
365    } else {
366        None
367    }
368}
369
370/// Convert path-style reference links to inline code.
371///
372/// Transforms: `[``ProgressTracker``][crate::style::ProgressTracker]`
373/// Into: `` `ProgressTracker` ``
374///
375/// Without full link resolution context, we can't create valid anchors,
376/// so we preserve the display text as inline code.
377#[must_use]
378pub fn convert_path_reference_links(docs: &str) -> String {
379    replace_with_regex(docs, &PATH_REF_LINK_RE, |caps| {
380        let display_text = &caps[1];
381        // Don't double-wrap in backticks
382        if display_text.starts_with('`') && display_text.ends_with('`') {
383            display_text.to_string()
384        } else {
385            format!("`{display_text}`")
386        }
387    })
388}
389
390// =============================================================================
391// DocLinkProcessor
392// =============================================================================
393
394/// Processes doc comments to resolve intra-doc links to markdown links.
395///
396/// Rustdoc JSON includes a `links` field on each Item that maps intra-doc
397/// link text to item IDs. This processor uses that map along with the
398/// `LinkRegistry` to convert these to relative markdown links.
399///
400/// # Supported Patterns
401///
402/// - `` [`Name`] `` - Backtick code links (most common)
403/// - `` [`path::to::Item`] `` - Qualified path links
404/// - `` [`Type::method`] `` - Method/associated item links
405/// - `[name]` - Plain identifier links
406/// - `[text][`ref`]` - Reference-style links
407/// - `[text][crate::path]` - Path reference links
408///
409/// # External Crate Links
410///
411/// Items from external crates are linked to docs.rs when possible.
412///
413/// # Code Block Protection
414///
415/// Links inside fenced code blocks are not processed.
416pub struct DocLinkProcessor<'a> {
417    /// The crate being documented (for looking up items).
418    krate: &'a Crate,
419
420    /// Registry mapping IDs to file paths.
421    link_registry: &'a LinkRegistry,
422
423    /// The current file path (for relative link calculation).
424    current_file: &'a str,
425
426    /// Index mapping item names to their IDs for fast lookup.
427    /// Built from `krate.paths` at construction time.
428    path_name_index: HashMap<&'a str, Vec<Id>>,
429}
430
431impl<'a> DocLinkProcessor<'a> {
432    /// Create a new processor for the given context.
433    #[must_use]
434    pub fn new(krate: &'a Crate, link_registry: &'a LinkRegistry, current_file: &'a str) -> Self {
435        // Build path name index for O(1) lookups
436        let mut path_name_index: HashMap<&'a str, Vec<Id>> = HashMap::new();
437        for (id, path_info) in &krate.paths {
438            if let Some(name) = path_info.path.last() {
439                path_name_index.entry(name.as_str()).or_default().push(*id);
440            }
441        }
442
443        // Sort each Vec by full path for deterministic resolution order
444        for ids in path_name_index.values_mut() {
445            ids.sort_by(|a, b| {
446                let path_a = krate.paths.get(a).map(|p| p.path.join("::"));
447                let path_b = krate.paths.get(b).map(|p| p.path.join("::"));
448                path_a.cmp(&path_b)
449            });
450        }
451
452        Self {
453            krate,
454            link_registry,
455            current_file,
456            path_name_index,
457        }
458    }
459
460    /// Process a doc string and resolve all intra-doc links.
461    ///
462    /// Uses the item's `links` map to resolve link text to IDs,
463    /// then uses `LinkRegistry` to convert IDs to relative paths.
464    #[must_use]
465    pub fn process(&self, docs: &str, item_links: &HashMap<String, Id>) -> String {
466        // Step 1: Strip reference definitions first
467        let stripped = strip_reference_definitions(docs);
468
469        // Step 2: Unhide rustdoc hidden lines in code blocks and add `rust` to bare fences
470        let unhidden = unhide_code_lines(&stripped);
471
472        // Step 3: Process all link types (with code block protection)
473        let processed = self.process_links_protected(&unhidden, item_links);
474
475        // Step 4: Clean up blank lines
476        Self::clean_blank_lines(&processed)
477    }
478
479    /// Process links while protecting code block contents.
480    fn process_links_protected(&self, docs: &str, item_links: &HashMap<String, Id>) -> String {
481        let mut result = String::with_capacity(docs.len());
482        let mut current_pos = 0;
483        let _bytes = docs.as_bytes();
484
485        // Track code block state
486        let mut in_code_block = false;
487        let mut fence: Option<&str> = None;
488
489        for line in docs.lines() {
490            let line_end = current_pos + line.len();
491
492            // Check for code fence
493            let trimmed = line.trim_start();
494            if let Some(f) = detect_fence(trimmed) {
495                if in_code_block {
496                    // Check if this closes the current block
497                    if let Some(open_fence) = fence
498                        && trimmed.starts_with(open_fence)
499                    {
500                        in_code_block = false;
501                        fence = None;
502                    }
503                } else {
504                    in_code_block = true;
505                    fence = Some(f);
506                }
507
508                result.push_str(line);
509            } else if in_code_block {
510                // Inside code block - don't process
511                result.push_str(line);
512            } else {
513                // Outside code block - process links
514                let processed = self.process_line(line, item_links);
515                result.push_str(&processed);
516            }
517
518            // Add newline if not at end
519            current_pos = line_end;
520            if current_pos < docs.len() {
521                result.push('\n');
522                current_pos += 1; // Skip the newline character
523            }
524        }
525
526        result
527    }
528
529    /// Process a single line for all link types.
530    fn process_line(&self, line: &str, item_links: &HashMap<String, Id>) -> String {
531        // Skip lines that look like reference definitions (backup check)
532        if line.trim_start().starts_with("[`") && line.contains("]:") {
533            return String::new();
534        }
535
536        // Process in order of specificity (most specific patterns first)
537        let s = self.process_reference_links(line, item_links);
538        let s = self.process_path_reference_links(&s, item_links);
539        let s = self.process_method_links(&s, item_links);
540        let s = self.process_backtick_links(&s, item_links);
541        let s = self.process_plain_links(&s, item_links);
542
543        self.process_html_links_with_context(&s, item_links)
544    }
545
546    /// Process reference-style links `[display text][`Span`]`.
547    fn process_reference_links(&self, text: &str, item_links: &HashMap<String, Id>) -> String {
548        replace_with_regex(text, &REFERENCE_LINK_RE, |caps| {
549            let display_text = &caps[1];
550            let ref_key = &caps[2];
551
552            self.resolve_to_url(ref_key, item_links).map_or_else(
553                || caps[0].to_string(),
554                |url| format!("[{display_text}]({url})"),
555            )
556        })
557    }
558
559    /// Process path reference links `[text][crate::path::Item]`.
560    fn process_path_reference_links(&self, text: &str, item_links: &HashMap<String, Id>) -> String {
561        replace_with_regex(text, &PATH_REF_LINK_RE, |caps| {
562            let display_text = &caps[1];
563            let rust_path = &caps[2];
564
565            self.resolve_to_url(rust_path, item_links).map_or_else(
566                // Can't resolve - keep as inline code without broken anchor
567                || {
568                    // Don't double-wrap in backticks
569                    if display_text.starts_with('`') && display_text.ends_with('`') {
570                        display_text.to_string()
571                    } else {
572                        format!("`{display_text}`")
573                    }
574                },
575                |url| format!("[{display_text}]({url})"),
576            )
577        })
578    }
579
580    /// Process method links `[``Type::method``]`.
581    fn process_method_links(&self, text: &str, item_links: &HashMap<String, Id>) -> String {
582        replace_with_regex_checked(text, &METHOD_LINK_RE, |caps, rest| {
583            // Skip if already a markdown link
584            if rest.starts_with('(') {
585                return caps[0].to_string();
586            }
587
588            let full_path = &caps[1];
589            if let Some(last_sep) = full_path.rfind("::") {
590                let type_part = &full_path[..last_sep];
591                let method_part = &full_path[last_sep + 2..];
592
593                if let Some(link) = self.resolve_method_link(type_part, method_part, item_links) {
594                    return link;
595                }
596            }
597            caps[0].to_string()
598        })
599    }
600
601    /// Process backtick links `[`Name`]`.
602    fn process_backtick_links(&self, text: &str, item_links: &HashMap<String, Id>) -> String {
603        replace_with_regex_checked(text, &BACKTICK_LINK_RE, |caps, rest| {
604            // Skip if already a markdown link
605            if rest.starts_with('(') {
606                return caps[0].to_string();
607            }
608
609            let link_text = &caps[1];
610            self.resolve_link(link_text, item_links)
611        })
612    }
613
614    /// Process plain links `[name]`.
615    fn process_plain_links(&self, text: &str, item_links: &HashMap<String, Id>) -> String {
616        replace_with_regex_checked(text, &PLAIN_LINK_RE, |caps, rest| {
617            // Skip if already a markdown link
618            if matches!(rest.chars().next(), Some('(' | '[')) {
619                return caps[0].to_string();
620            }
621
622            let link_text = &caps[1];
623
624            // Only process if it's in item_links (avoid false positives)
625            if let Some(id) = item_links.get(link_text)
626                && let Some(md_link) = self.create_link_for_id(*id, link_text)
627            {
628                return md_link;
629            }
630            caps[0].to_string()
631        })
632    }
633
634    /// Process HTML-style rustdoc links with context awareness.
635    ///
636    /// Instead of blindly converting all HTML links to local anchors,
637    /// this method checks if the item actually exists on the current page.
638    /// If not, it tries to resolve to docs.rs or removes the broken link.
639    fn process_html_links_with_context(
640        &self,
641        text: &str,
642        item_links: &HashMap<String, Id>,
643    ) -> String {
644        replace_with_regex(text, &HTML_LINK_RE, |caps| {
645            let item_kind = &caps[1]; // struct, enum, trait, etc.
646            let item_name = &caps[2];
647
648            // If there's a method/variant anchor part, remove the link entirely
649            // since methods don't have individual headings
650            if caps.get(4).is_some() {
651                return String::new();
652            }
653
654            // Try to find this item in our link resolution
655            if let Some(url) = self.resolve_html_link_to_url(item_name, item_kind, item_links) {
656                return format!("({url})");
657            }
658
659            // Fallback: remove the link part entirely (keep just the display text)
660            // This is better than creating a broken #anchor
661            String::new()
662        })
663    }
664
665    /// Try to resolve an HTML-style link to a proper URL.
666    ///
667    /// Returns a URL if the item can be resolved (either locally or to docs.rs),
668    /// or None if the item cannot be found.
669    fn resolve_html_link_to_url(
670        &self,
671        item_name: &str,
672        item_kind: &str,
673        item_links: &HashMap<String, Id>,
674    ) -> Option<String> {
675        // Strategy 1: Check if item is in item_links
676        if let Some(id) = item_links.get(item_name) {
677            // Check if it's on the current page
678            if let Some(path) = self.link_registry.get_path(*id) {
679                if path == self.current_file {
680                    // Only create anchor if item has a heading
681                    if let Some(path_info) = self.krate.paths.get(id)
682                        && item_has_anchor(path_info.kind)
683                    {
684                        return Some(format!("#{}", item_name.to_lowercase()));
685                    }
686                    // Item on page but no anchor - link to page without anchor
687                    return Some(String::new());
688                }
689                // Item is in another file
690                let relative = LinkRegistry::compute_relative_path(self.current_file, path);
691                return Some(relative);
692            }
693
694            // Try docs.rs for external crates
695            if let Some(path_info) = self.krate.paths.get(id)
696                && path_info.crate_id != 0
697            {
698                return Self::get_docs_rs_url(path_info);
699            }
700        }
701
702        // Strategy 2: Search path_name_index for the item name
703        if let Some(ids) = self.path_name_index.get(item_name) {
704            for id in ids {
705                if let Some(path) = self.link_registry.get_path(*id) {
706                    if path == self.current_file {
707                        // Only create anchor if item has a heading
708                        if let Some(path_info) = self.krate.paths.get(id)
709                            && item_has_anchor(path_info.kind)
710                        {
711                            return Some(format!("#{}", item_name.to_lowercase()));
712                        }
713                        // Item on page but no anchor - link to page without anchor
714                        return Some(String::new());
715                    }
716                    let relative = LinkRegistry::compute_relative_path(self.current_file, path);
717                    return Some(relative);
718                }
719
720                // Try docs.rs
721                if let Some(path_info) = self.krate.paths.get(id)
722                    && path_info.crate_id != 0
723                {
724                    return Self::get_docs_rs_url(path_info);
725                }
726            }
727        }
728
729        // Strategy 3: Search krate.paths for external items by name
730        // Collect all matches and pick the shortest path (most specific) for determinism
731        let mut matches: Vec<_> = self
732            .krate
733            .paths
734            .values()
735            .filter(|path_info| {
736                path_info.crate_id != 0
737                    && path_info.path.last().is_some_and(|name| name == item_name)
738                    && Self::kind_matches(item_kind, path_info.kind)
739            })
740            .collect();
741
742        // Sort by full path for deterministic selection
743        matches.sort_by(|a, b| a.path.join("::").cmp(&b.path.join("::")));
744
745        matches.first().and_then(|path_info| Self::get_docs_rs_url(path_info))
746    }
747
748    /// Check if the HTML link kind matches the rustdoc item kind.
749    fn kind_matches(html_kind: &str, item_kind: ItemKind) -> bool {
750        match html_kind {
751            "struct" => item_kind == ItemKind::Struct,
752            "enum" => item_kind == ItemKind::Enum,
753            "trait" => item_kind == ItemKind::Trait,
754            "fn" => item_kind == ItemKind::Function,
755            "type" => item_kind == ItemKind::TypeAlias,
756            "macro" => item_kind == ItemKind::Macro,
757            "constant" => item_kind == ItemKind::Constant,
758            "mod" => item_kind == ItemKind::Module,
759            _ => false,
760        }
761    }
762
763    /// Clean up multiple consecutive blank lines.
764    fn clean_blank_lines(docs: &str) -> String {
765        let mut result = String::with_capacity(docs.len());
766        let mut prev_blank = false;
767
768        for line in docs.lines() {
769            let is_blank = line.trim().is_empty();
770            if is_blank && prev_blank {
771                continue;
772            }
773            if !result.is_empty() {
774                result.push('\n');
775            }
776            result.push_str(line);
777            prev_blank = is_blank;
778        }
779
780        result.trim_end().to_string()
781    }
782
783    // =========================================================================
784    // Resolution Methods
785    // =========================================================================
786
787    /// Resolve a link reference to a URL.
788    fn resolve_to_url(&self, link_text: &str, item_links: &HashMap<String, Id>) -> Option<String> {
789        // Strategy 1: Exact match in item_links
790        if let Some(id) = item_links.get(link_text)
791            && let Some(url) = self.get_url_for_id(*id)
792        {
793            return Some(url);
794        }
795
796        // Strategy 2: Short name match in item_links
797        let short_name = link_text.split("::").last().unwrap_or(link_text);
798
799        for (key, id) in item_links {
800            if key.split("::").last() == Some(short_name)
801                && let Some(url) = self.get_url_for_id(*id)
802            {
803                return Some(url);
804            }
805        }
806
807        // Strategy 3: Use path name index
808        if let Some(ids) = self.path_name_index.get(short_name) {
809            for id in ids {
810                if let Some(url) = self.get_url_for_id(*id) {
811                    return Some(url);
812                }
813            }
814        }
815
816        None
817    }
818
819    /// Get the URL for an ID (local or docs.rs).
820    fn get_url_for_id(&self, id: Id) -> Option<String> {
821        // Try local first
822        if let Some(path) = self.link_registry.get_path(id) {
823            let relative = LinkRegistry::compute_relative_path(self.current_file, path);
824            return Some(relative);
825        }
826
827        // Try docs.rs for external crates
828        if let Some(path_info) = self.krate.paths.get(&id)
829            && path_info.crate_id != 0
830        {
831            return Self::get_docs_rs_url(path_info);
832        }
833
834        None
835    }
836
837    /// Get docs.rs URL for an external crate item.
838    fn get_docs_rs_url(path_info: &rustdoc_types::ItemSummary) -> Option<String> {
839        let path = &path_info.path;
840        if path.is_empty() {
841            return None;
842        }
843
844        let crate_name = &path[0];
845
846        // Handle module URLs specially
847        if path_info.kind == ItemKind::Module {
848            if path.len() == 1 {
849                return Some(format!("https://docs.rs/{crate_name}/latest/{crate_name}/"));
850            }
851
852            let module_path = path[1..].join("/");
853
854            return Some(format!(
855                "https://docs.rs/{crate_name}/latest/{crate_name}/{module_path}/index.html"
856            ));
857        }
858
859        let item_path = path[1..].join("/");
860        let type_prefix = match path_info.kind {
861            ItemKind::Struct => "struct",
862            ItemKind::Enum => "enum",
863            ItemKind::Trait => "trait",
864            ItemKind::Function => "fn",
865            ItemKind::Constant => "constant",
866            ItemKind::TypeAlias => "type",
867            ItemKind::Macro => "macro",
868            _ => "index",
869        };
870
871        let item_name = path.last().unwrap_or(crate_name);
872
873        if item_path.is_empty() {
874            Some(format!("https://docs.rs/{crate_name}/latest/{crate_name}/"))
875        } else {
876            // Remove last segment from path for the directory
877            let dir_path = if path.len() > 2 {
878                path[1..path.len() - 1].join("/")
879            } else {
880                String::new()
881            };
882
883            if dir_path.is_empty() {
884                Some(format!(
885                    "https://docs.rs/{crate_name}/latest/{crate_name}/{type_prefix}.{item_name}.html"
886                ))
887            } else {
888                Some(format!(
889                    "https://docs.rs/{crate_name}/latest/{crate_name}/{dir_path}/{type_prefix}.{item_name}.html"
890                ))
891            }
892        }
893    }
894
895    /// Resolve a method link to a markdown link (without method anchor).
896    ///
897    /// Links to the type's page since methods don't have individual headings
898    /// in the generated markdown.
899    fn resolve_method_link(
900        &self,
901        type_name: &str,
902        method_name: &str,
903        item_links: &HashMap<String, Id>,
904    ) -> Option<String> {
905        // Try to find the type
906        let type_id = item_links.get(type_name).or_else(|| {
907            let short_type = type_name.split("::").last().unwrap_or(type_name);
908            item_links
909                .iter()
910                .find(|(k, _)| k.split("::").last() == Some(short_type))
911                .map(|(_, id)| id)
912        })?;
913
914        let type_path = self.link_registry.get_path(*type_id)?;
915        let relative = LinkRegistry::compute_relative_path(self.current_file, type_path);
916        let display = format!("{type_name}::{method_name}");
917
918        // Link to the type page without a method anchor (methods don't have headings)
919        Some(format!("[`{display}`]({relative})"))
920    }
921
922    /// Try to resolve link text to a markdown link.
923    fn resolve_link(&self, link_text: &str, item_links: &HashMap<String, Id>) -> String {
924        // Strategy 1: Exact match
925        if let Some(id) = item_links.get(link_text)
926            && let Some(md_link) = self.create_link_for_id(*id, link_text)
927        {
928            return md_link;
929        }
930
931        // Strategy 2: Short name match in item_links
932        let short_name = link_text.split("::").last().unwrap_or(link_text);
933
934        for (key, id) in item_links {
935            if key.split("::").last() == Some(short_name)
936                && let Some(md_link) = self.create_link_for_id(*id, short_name)
937            {
938                return md_link;
939            }
940        }
941
942        // Strategy 3: Use path name index
943        if let Some(ids) = self.path_name_index.get(short_name) {
944            for id in ids {
945                if let Some(md_link) = self.create_link_for_id(*id, short_name) {
946                    return md_link;
947                }
948            }
949        }
950
951        // Fallback: return original
952        format!("[`{link_text}`]")
953    }
954
955    /// Create a markdown link for an ID.
956    fn create_link_for_id(&self, id: Id, display_name: &str) -> Option<String> {
957        // Try local link
958        if let Some(link) = self.link_registry.create_link(id, self.current_file) {
959            return Some(link);
960        }
961
962        if let Some(path) = self.link_registry.get_path(id) {
963            let relative = LinkRegistry::compute_relative_path(self.current_file, path);
964            let clean_name = display_name.split("::").last().unwrap_or(display_name);
965            return Some(format!("[`{clean_name}`]({relative})"));
966        }
967
968        // Try docs.rs for external crates
969        if let Some(path_info) = self.krate.paths.get(&id)
970            && path_info.crate_id != 0
971        {
972            return Self::create_docs_rs_link(path_info, display_name);
973        }
974
975        None
976    }
977
978    /// Create a docs.rs link for an external crate item.
979    fn create_docs_rs_link(
980        path_info: &rustdoc_types::ItemSummary,
981        display_name: &str,
982    ) -> Option<String> {
983        let url = Self::get_docs_rs_url(path_info)?;
984        let clean_name = display_name.split("::").last().unwrap_or(display_name);
985        Some(format!("[`{clean_name}`]({url})"))
986    }
987}
988
989// =============================================================================
990// Helper Functions
991// =============================================================================
992
993/// Replace regex matches using a closure.
994fn replace_with_regex<F>(text: &str, re: &Regex, replacer: F) -> String
995where
996    F: Fn(&regex::Captures<'_>) -> String,
997{
998    let mut result = String::with_capacity(text.len());
999    let mut last_end = 0;
1000
1001    for caps in re.captures_iter(text) {
1002        let m = caps.get(0).unwrap();
1003        result.push_str(&text[last_end..m.start()]);
1004        result.push_str(&replacer(&caps));
1005        last_end = m.end();
1006    }
1007
1008    result.push_str(&text[last_end..]);
1009    result
1010}
1011
1012/// Replace regex matches with access to the text after the match.
1013fn replace_with_regex_checked<F>(text: &str, re: &Regex, replacer: F) -> String
1014where
1015    F: Fn(&regex::Captures<'_>, &str) -> String,
1016{
1017    let mut result = String::with_capacity(text.len());
1018    let mut last_end = 0;
1019
1020    for caps in re.captures_iter(text) {
1021        let m = caps.get(0).unwrap();
1022        result.push_str(&text[last_end..m.start()]);
1023        let rest = &text[m.end()..];
1024        result.push_str(&replacer(&caps, rest));
1025        last_end = m.end();
1026    }
1027
1028    result.push_str(&text[last_end..]);
1029    result
1030}
1031
1032// =============================================================================
1033// Tests
1034// =============================================================================
1035
1036#[cfg(test)]
1037mod tests {
1038    use super::*;
1039
1040    #[test]
1041    fn test_convert_html_links() {
1042        // Type-level links get anchors
1043        assert_eq!(
1044            convert_html_links("See (enum.Foo.html) for details"),
1045            "See (#foo) for details"
1046        );
1047        // Method-level links are removed (methods don't have anchors)
1048        assert_eq!(
1049            convert_html_links("Call (struct.Bar.html#method.new)"),
1050            "Call "
1051        );
1052    }
1053
1054    #[test]
1055    fn test_strip_duplicate_title() {
1056        let docs = "# my_crate\n\nThis is the description.";
1057        assert_eq!(
1058            strip_duplicate_title(docs, "my_crate"),
1059            "This is the description."
1060        );
1061
1062        // Different title - keep it
1063        let docs2 = "# Introduction\n\nThis is the description.";
1064        assert_eq!(strip_duplicate_title(docs2, "my_crate"), docs2);
1065
1066        // Backticks around title (e.g., # `clap_builder`)
1067        let docs3 = "# `clap_builder`\n\nBuilder implementation.";
1068        assert_eq!(
1069            strip_duplicate_title(docs3, "clap_builder"),
1070            "Builder implementation."
1071        );
1072
1073        // Spaced title (e.g., # Serde JSON -> serde_json)
1074        let docs4 = "# Serde JSON\n\nJSON serialization.";
1075        assert_eq!(
1076            strip_duplicate_title(docs4, "serde_json"),
1077            "JSON serialization."
1078        );
1079
1080        // Hyphenated name
1081        let docs5 = "# my-crate\n\nDescription.";
1082        assert_eq!(strip_duplicate_title(docs5, "my_crate"), "Description.");
1083    }
1084
1085    #[test]
1086    fn test_strip_reference_definitions() {
1087        // Backtick-style reference definitions
1088        let docs = "See [`Foo`] for details.\n\n[`Foo`]: crate::Foo";
1089        let result = strip_reference_definitions(docs);
1090        assert!(result.contains("See [`Foo`]"));
1091        assert!(!result.contains("[`Foo`]: crate::Foo"));
1092
1093        // Plain reference definitions (no backticks)
1094        let docs2 = "Use [value] here.\n\n[value]: crate::value::Value";
1095        let result2 = strip_reference_definitions(docs2);
1096        assert!(result2.contains("Use [value]"));
1097        assert!(!result2.contains("[value]: crate::value::Value"));
1098
1099        // Reference definitions with anchors
1100        let docs3 = "See [from_str](#from-str) docs.\n\n[from_str](#from-str): crate::de::from_str";
1101        let result3 = strip_reference_definitions(docs3);
1102        assert!(result3.contains("See [from_str](#from-str)"));
1103        assert!(!result3.contains("[from_str](#from-str): crate::de::from_str"));
1104
1105        // Multiple reference definitions
1106        let docs4 = "Content.\n\n[a]: path::a\n[b]: path::b\n[`c`]: path::c";
1107        let result4 = strip_reference_definitions(docs4);
1108        assert_eq!(result4.trim(), "Content.");
1109    }
1110
1111    #[test]
1112    fn test_convert_path_reference_links() {
1113        // Path references become inline code (can't create valid anchors without context)
1114        let docs = "[`Tracker`][crate::style::Tracker] is useful";
1115        let result = convert_path_reference_links(docs);
1116        assert_eq!(result, "`Tracker` is useful");
1117    }
1118
1119    #[test]
1120    fn test_unhide_code_lines_strips_hidden_prefix() {
1121        let docs = "```\n# #[cfg(feature = \"test\")]\n# {\nuse foo::bar;\n# }\n```";
1122        let result = unhide_code_lines(docs);
1123        assert_eq!(
1124            result,
1125            "```rust\n#[cfg(feature = \"test\")]\n{\nuse foo::bar;\n}\n```"
1126        );
1127    }
1128
1129    #[test]
1130    fn test_unhide_code_lines_adds_rust_to_bare_fence() {
1131        let docs = "```\nlet x = 1;\n```";
1132        let result = unhide_code_lines(docs);
1133        assert_eq!(result, "```rust\nlet x = 1;\n```");
1134    }
1135
1136    #[test]
1137    fn test_unhide_code_lines_preserves_existing_language() {
1138        let docs = "```python\nprint('hello')\n```";
1139        let result = unhide_code_lines(docs);
1140        assert_eq!(result, "```python\nprint('hello')\n```");
1141    }
1142
1143    #[test]
1144    fn test_unhide_code_lines_handles_tilde_fence() {
1145        let docs = "~~~\ncode\n~~~";
1146        let result = unhide_code_lines(docs);
1147        assert_eq!(result, "~~~rust\ncode\n~~~");
1148    }
1149
1150    #[test]
1151    fn test_unhide_code_lines_lone_hash() {
1152        // A lone # becomes an empty line
1153        let docs = "```\n#\nlet x = 1;\n```";
1154        let result = unhide_code_lines(docs);
1155        assert_eq!(result, "```rust\n\nlet x = 1;\n```");
1156    }
1157}