cargo_docs_md/generator/doc_links.rs
1//! Intra-doc link processing for documentation generation.
2//!
3//! This module provides [`DocLinkProcessor`] which transforms rustdoc
4//! intra-doc link syntax into proper markdown links.
5//!
6//! # Processing Pipeline
7//! The processor applies transformations in this order:
8//! 1. Strip markdown reference definitions
9//! 2. Unhide rustdoc hidden lines in code blocks
10//! 3. Process reference-style links `[text][`ref`]`
11//! 4. Process path reference links `[text][crate::path]`
12//! 5. Process method links `[Type::method]`
13//! 6. Process backtick links `[`Name`]`
14//! 7. Process plain links `[name]`
15//! 8. Convert HTML-style rustdoc links
16//! 9. Clean up blank lines
17//!
18//! Links inside code blocks are protected from transformation.
19
20use std::collections::HashMap;
21use std::sync::LazyLock;
22
23use regex::Regex;
24use rustdoc_types::{Crate, Id, ItemKind};
25
26use crate::linker::{item_has_anchor, LinkRegistry};
27
28// =============================================================================
29// Static Regex Patterns (compiled once, reused everywhere)
30// =============================================================================
31
32/// Regex for HTML-style rustdoc links.
33/// Matches: `(struct.Name.html)` or `(enum.Name.html#method.foo)`
34static HTML_LINK_RE: LazyLock<Regex> = LazyLock::new(|| {
35 Regex::new(concat!(
36 r"\((struct|enum|trait|fn|type|macro|constant|mod)\.",
37 r"([A-Za-z_][A-Za-z0-9_]*)\.html",
38 r"(?:#([a-z]+)\.([A-Za-z_][A-Za-z0-9_]*))?\)",
39 ))
40 .unwrap()
41});
42
43/// Regex for path-style reference links.
44///
45/// Matches: `[display][crate::path::Item]`
46///
47/// Used for rustdoc's reference-style intra-doc links where the display text
48/// differs from the path reference.
49///
50/// # Capture Groups
51/// - Group 1: Display text (anything except `]`)
52/// - Group 2: Rust path with `::` separators (e.g., `crate::module::Item`)
53///
54/// # Pattern Breakdown
55/// ```text
56/// \[([^\]]+)\] # [display text] - capture non-] chars
57/// \[ # Opening bracket for reference
58/// ([a-zA-Z_][a-zA-Z0-9_]* # First path segment (valid Rust identifier)
59/// (?:::[a-zA-Z_][a-zA-Z0-9_]*)+ # One or more ::segment pairs (requires at least one ::)
60/// )\] # Close capture and bracket
61/// ```
62///
63/// # Note
64/// The pattern requires at least one `::` separator, so it won't match
65/// single identifiers like `[text][Name]`.
66static PATH_REF_LINK_RE: LazyLock<Regex> = LazyLock::new(|| {
67 Regex::new(r"\[([^\]]+)\]\[([a-zA-Z_][a-zA-Z0-9_]*(?:::[a-zA-Z_][a-zA-Z0-9_]*)+)\]").unwrap()
68});
69
70/// Regex for backtick code links.
71///
72/// Matches: `` [`Name`] `` (the most common intra-doc link format)
73///
74/// This is the primary pattern for rustdoc intra-doc links. The backticks
75/// indicate the link should be rendered as inline code.
76///
77/// # Capture Groups
78/// - Group 1: The link text inside backticks (e.g., `Name`, `path::Item`)
79///
80/// # Pattern Breakdown
81/// ```text
82/// \[` # Literal "[`" - opening bracket and backtick
83/// ([^`]+) # Capture: one or more non-backtick characters
84/// `\] # Literal "`]" - closing backtick and bracket
85/// ```
86///
87/// # Processing Note
88/// The code checks if the match is followed by `(` to avoid double-processing
89/// already-converted markdown links like `` [`Name`](url) ``.
90static BACKTICK_LINK_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[`([^`]+)`\]").unwrap());
91
92/// Regex for reference-style links with backticks.
93///
94/// Matches: `` [display text][`ref`] ``
95///
96/// This pattern handles rustdoc reference-style links where custom display
97/// text links to a backtick-wrapped reference.
98///
99/// # Capture Groups
100/// - Group 1: Display text (what the user sees)
101/// - Group 2: Reference text inside backticks (the actual link target)
102///
103/// # Pattern Breakdown
104/// ```text
105/// \[([^\]]+)\] # [display text] - capture anything except ]
106/// \[` # Opening "[`" for the reference
107/// ([^`]+) # Capture: reference name (non-backtick chars)
108/// `\] # Closing "`]"
109/// ```
110///
111/// # Example
112/// `` [custom text][`HashMap`] `` renders as "custom text" linking to `HashMap`.
113static REFERENCE_LINK_RE: LazyLock<Regex> =
114 LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\[`([^`]+)`\]").unwrap());
115
116/// Regex for markdown reference definitions.
117///
118/// Matches: `` [`Name`]: path::to::item `` at line start
119///
120/// These are markdown reference definition lines that rustdoc uses internally.
121/// We strip these from output since intra-doc links are resolved directly.
122///
123/// # Pattern Breakdown
124/// ```text
125/// (?m) # Multi-line mode: ^ and $ match line boundaries
126/// ^ # Start of line
127/// \s* # Optional leading whitespace
128/// \[`[^`]+`\] # Backtick link syntax (not captured)
129/// : # Literal colon separator
130/// \s* # Optional whitespace after colon
131/// \S+ # The target path (non-whitespace chars)
132/// \s* # Optional trailing whitespace
133/// $ # End of line
134/// ```
135///
136/// # Note
137/// This pattern doesn't capture groups because it's used with `replace_all`
138/// to remove entire lines.
139///
140/// Matches various reference definition formats:
141/// - `[`Foo`]: crate::Foo` (backtick style)
142/// - `[name]: crate::path` (plain style)
143/// - `[name](#anchor): crate::path` (with anchor)
144static REFERENCE_DEF_RE: LazyLock<Regex> =
145 LazyLock::new(|| Regex::new(r"(?m)^\s*\[[^\]]+\](?:\([^)]*\))?:\s*\S+\s*$").unwrap());
146
147/// Regex for plain identifier links.
148///
149/// Matches: `[name]` where name is a valid Rust identifier
150///
151/// This handles the simplest intra-doc link format without backticks.
152/// Used less frequently than backtick links but still valid rustdoc syntax.
153///
154/// # Capture Groups
155/// - Group 1: The identifier name
156///
157/// # Pattern Breakdown
158/// ```text
159/// \[ # Opening bracket
160/// ([a-zA-Z_] # Capture start: letter or underscore (Rust identifier rules)
161/// [a-zA-Z0-9_]*) # Followed by alphanumeric or underscore
162/// \] # Closing bracket
163/// ```
164///
165/// # Processing Note
166/// The code checks if the match is followed by `(` or `[` to avoid
167/// false positives on existing markdown links or reference-style links.
168/// Also only processes if the identifier exists in `item_links`.
169static PLAIN_LINK_RE: LazyLock<Regex> =
170 LazyLock::new(|| Regex::new(r"\[([a-zA-Z_][a-zA-Z0-9_]*)\]").unwrap());
171
172/// Regex for method/associated item links.
173///
174/// Matches: `` [`Type::method`] `` or `` [`mod::Type::CONST`] ``
175///
176/// Handles links to methods, associated functions, constants, and other
177/// items accessed via `::` path notation. This includes both type-level
178/// paths (`Type::method`) and module-level paths (`mod::Type::CONST`).
179///
180/// # Capture Groups
181/// - Group 1: The full path including `::` separators
182///
183/// # Pattern Breakdown
184/// ```text
185/// \[` # Opening "[`"
186/// ( # Start capture group
187/// [A-Za-z_][A-Za-z0-9_]* # First segment (Rust identifier)
188/// (?:::[A-Za-z_][A-Za-z0-9_]*)+ # One or more ::segment pairs
189/// ) # End capture group
190/// `\] # Closing "`]"
191/// ```
192///
193/// # Examples Matched
194/// - `` [`HashMap::new`] `` - associated function
195/// - `` [`Option::Some`] `` - enum variant
196/// - `` [`Iterator::next`] `` - trait method
197/// - `` [`std::vec::Vec`] `` - fully qualified path
198///
199/// # Processing Note
200/// The last segment after `::` is used as the anchor (lowercased).
201/// The type path before the last `::` is used to find the target file.
202static METHOD_LINK_RE: LazyLock<Regex> = LazyLock::new(|| {
203 Regex::new(r"\[`([A-Za-z_][A-Za-z0-9_]*(?:::[A-Za-z_][A-Za-z0-9_]*)+)`\]").unwrap()
204});
205
206// =============================================================================
207// Standalone Functions
208// =============================================================================
209
210/// Convert HTML-style rustdoc links to markdown anchors.
211///
212/// Transforms links like:
213/// - `(enum.NumberPrefix.html)` -> `(#numberprefix)`
214/// - `(struct.Foo.html#method.bar)` -> removes the link (methods don't have anchors)
215///
216/// This is useful for multi-crate documentation where the full processor
217/// context may not be available.
218#[must_use]
219pub fn convert_html_links(docs: &str) -> String {
220 replace_with_regex(docs, &HTML_LINK_RE, |caps| {
221 let item_name = &caps[2];
222
223 // If there's a method/variant anchor part, remove the link entirely
224 // since methods don't have individual headings
225 if caps.get(4).is_some() {
226 // Return empty to remove the (link) part, keeping just the display text
227 String::new()
228 } else {
229 // Type-level anchor should exist
230 format!("(#{})", item_name.to_lowercase())
231 }
232 })
233}
234
235/// Strip duplicate title from documentation.
236///
237/// Some crate/module docs start with `# title` which duplicates the generated
238/// `# Crate 'name'` or `# Module 'name'` heading.
239///
240/// # Arguments
241///
242/// * `docs` - The documentation string to process
243/// * `item_name` - The name of the crate or module being documented
244///
245/// # Returns
246///
247/// The docs with the leading title removed if it matches the item name,
248/// otherwise the original docs unchanged.
249#[must_use]
250pub fn strip_duplicate_title<'a>(docs: &'a str, item_name: &str) -> &'a str {
251 let Some(first_line) = docs.lines().next() else {
252 return docs;
253 };
254
255 let Some(title) = first_line.strip_prefix("# ") else {
256 return docs;
257 };
258
259 // Normalize the title:
260 // - Remove backticks (e.g., `clap_builder` -> clap_builder)
261 // - Replace spaces with underscores (e.g., "Serde JSON" -> "serde_json")
262 // - Replace hyphens with underscores (e.g., "my-crate" -> "my_crate")
263 // - Lowercase for comparison
264 let normalized_title = title
265 .trim()
266 .replace('`', "")
267 .replace(['-', ' '], "_")
268 .to_lowercase();
269
270 let normalized_name = item_name.replace('-', "_").to_lowercase();
271
272 if normalized_title == normalized_name {
273 // Skip the first line and any following blank lines
274 docs[first_line.len()..].trim_start_matches('\n')
275 } else {
276 docs
277 }
278}
279
280/// Strip markdown reference definition lines.
281///
282/// Removes lines like `[`Name`]: path::to::item` which are no longer needed
283/// after intra-doc links are processed.
284pub fn strip_reference_definitions(docs: &str) -> String {
285 REFERENCE_DEF_RE.replace_all(docs, "").to_string()
286}
287
288/// Unhide rustdoc hidden lines in code blocks and add language identifiers.
289///
290/// This function performs two transformations on code blocks:
291/// 1. Lines starting with `# ` inside code blocks are hidden in rustdoc
292/// but compiled. We remove the prefix to show the full example.
293/// 2. Bare code fences (` ``` `) are converted to ` ```rust ` since doc
294/// examples are Rust code.
295#[must_use]
296pub fn unhide_code_lines(docs: &str) -> String {
297 let mut result = String::with_capacity(docs.len());
298 let mut in_code_block = false;
299 let mut fence: Option<&str> = None;
300
301 for line in docs.lines() {
302 let trimmed = line.trim_start();
303
304 // Track code block boundaries
305 if let Some(f) = detect_fence(trimmed) {
306 if in_code_block && fence.is_some_and(|open| trimmed.starts_with(open)) {
307 // Closing fence
308 in_code_block = false;
309 fence = None;
310 result.push_str(line);
311 } else if !in_code_block {
312 // Opening fence - check if it needs a language identifier
313 in_code_block = true;
314 fence = Some(f);
315
316 // Add `rust` to bare fences (``` or ~~~)
317 let leading_ws = &line[..line.len() - trimmed.len()];
318 if trimmed == "```" || trimmed == "~~~" {
319 result.push_str(leading_ws);
320 result.push_str(trimmed);
321 result.push_str("rust");
322 } else {
323 result.push_str(line);
324 }
325 } else {
326 // Nested fence (different style) - just pass through
327 result.push_str(line);
328 }
329 result.push('\n');
330 continue;
331 }
332
333 if in_code_block {
334 let leading_ws = &line[..line.len() - trimmed.len()];
335
336 if trimmed == "#" {
337 // Just "#" becomes empty line (newline added below)
338 } else if let Some(rest) = trimmed.strip_prefix("# ") {
339 // "# code" becomes "code"
340 result.push_str(leading_ws);
341 result.push_str(rest);
342 } else {
343 result.push_str(line);
344 }
345 } else {
346 result.push_str(line);
347 }
348 result.push('\n');
349 }
350
351 // Remove trailing newline if original didn't have one
352 if !docs.ends_with('\n') && result.ends_with('\n') {
353 result.pop();
354 }
355
356 result
357}
358
359/// Detect a code fence and return the fence string.
360fn detect_fence(trimmed: &str) -> Option<&'static str> {
361 if trimmed.starts_with("```") {
362 Some("```")
363 } else if trimmed.starts_with("~~~") {
364 Some("~~~")
365 } else {
366 None
367 }
368}
369
370/// Convert path-style reference links to inline code.
371///
372/// Transforms: `[``ProgressTracker``][crate::style::ProgressTracker]`
373/// Into: `` `ProgressTracker` ``
374///
375/// Without full link resolution context, we can't create valid anchors,
376/// so we preserve the display text as inline code.
377#[must_use]
378pub fn convert_path_reference_links(docs: &str) -> String {
379 replace_with_regex(docs, &PATH_REF_LINK_RE, |caps| {
380 let display_text = &caps[1];
381 // Don't double-wrap in backticks
382 if display_text.starts_with('`') && display_text.ends_with('`') {
383 display_text.to_string()
384 } else {
385 format!("`{display_text}`")
386 }
387 })
388}
389
390// =============================================================================
391// DocLinkProcessor
392// =============================================================================
393
394/// Processes doc comments to resolve intra-doc links to markdown links.
395///
396/// Rustdoc JSON includes a `links` field on each Item that maps intra-doc
397/// link text to item IDs. This processor uses that map along with the
398/// `LinkRegistry` to convert these to relative markdown links.
399///
400/// # Supported Patterns
401///
402/// - `` [`Name`] `` - Backtick code links (most common)
403/// - `` [`path::to::Item`] `` - Qualified path links
404/// - `` [`Type::method`] `` - Method/associated item links
405/// - `[name]` - Plain identifier links
406/// - `[text][`ref`]` - Reference-style links
407/// - `[text][crate::path]` - Path reference links
408///
409/// # External Crate Links
410///
411/// Items from external crates are linked to docs.rs when possible.
412///
413/// # Code Block Protection
414///
415/// Links inside fenced code blocks are not processed.
416pub struct DocLinkProcessor<'a> {
417 /// The crate being documented (for looking up items).
418 krate: &'a Crate,
419
420 /// Registry mapping IDs to file paths.
421 link_registry: &'a LinkRegistry,
422
423 /// The current file path (for relative link calculation).
424 current_file: &'a str,
425
426 /// Index mapping item names to their IDs for fast lookup.
427 /// Built from `krate.paths` at construction time.
428 path_name_index: HashMap<&'a str, Vec<Id>>,
429}
430
431impl<'a> DocLinkProcessor<'a> {
432 /// Create a new processor for the given context.
433 #[must_use]
434 pub fn new(krate: &'a Crate, link_registry: &'a LinkRegistry, current_file: &'a str) -> Self {
435 // Build path name index for O(1) lookups
436 let mut path_name_index: HashMap<&'a str, Vec<Id>> = HashMap::new();
437 for (id, path_info) in &krate.paths {
438 if let Some(name) = path_info.path.last() {
439 path_name_index.entry(name.as_str()).or_default().push(*id);
440 }
441 }
442
443 // Sort each Vec by full path for deterministic resolution order
444 for ids in path_name_index.values_mut() {
445 ids.sort_by(|a, b| {
446 let path_a = krate.paths.get(a).map(|p| p.path.join("::"));
447 let path_b = krate.paths.get(b).map(|p| p.path.join("::"));
448 path_a.cmp(&path_b)
449 });
450 }
451
452 Self {
453 krate,
454 link_registry,
455 current_file,
456 path_name_index,
457 }
458 }
459
460 /// Process a doc string and resolve all intra-doc links.
461 ///
462 /// Uses the item's `links` map to resolve link text to IDs,
463 /// then uses `LinkRegistry` to convert IDs to relative paths.
464 #[must_use]
465 pub fn process(&self, docs: &str, item_links: &HashMap<String, Id>) -> String {
466 // Step 1: Strip reference definitions first
467 let stripped = strip_reference_definitions(docs);
468
469 // Step 2: Unhide rustdoc hidden lines in code blocks and add `rust` to bare fences
470 let unhidden = unhide_code_lines(&stripped);
471
472 // Step 3: Process all link types (with code block protection)
473 let processed = self.process_links_protected(&unhidden, item_links);
474
475 // Step 4: Clean up blank lines
476 Self::clean_blank_lines(&processed)
477 }
478
479 /// Process links while protecting code block contents.
480 fn process_links_protected(&self, docs: &str, item_links: &HashMap<String, Id>) -> String {
481 let mut result = String::with_capacity(docs.len());
482 let mut current_pos = 0;
483 let _bytes = docs.as_bytes();
484
485 // Track code block state
486 let mut in_code_block = false;
487 let mut fence: Option<&str> = None;
488
489 for line in docs.lines() {
490 let line_end = current_pos + line.len();
491
492 // Check for code fence
493 let trimmed = line.trim_start();
494 if let Some(f) = detect_fence(trimmed) {
495 if in_code_block {
496 // Check if this closes the current block
497 if let Some(open_fence) = fence
498 && trimmed.starts_with(open_fence)
499 {
500 in_code_block = false;
501 fence = None;
502 }
503 } else {
504 in_code_block = true;
505 fence = Some(f);
506 }
507
508 result.push_str(line);
509 } else if in_code_block {
510 // Inside code block - don't process
511 result.push_str(line);
512 } else {
513 // Outside code block - process links
514 let processed = self.process_line(line, item_links);
515 result.push_str(&processed);
516 }
517
518 // Add newline if not at end
519 current_pos = line_end;
520 if current_pos < docs.len() {
521 result.push('\n');
522 current_pos += 1; // Skip the newline character
523 }
524 }
525
526 result
527 }
528
529 /// Process a single line for all link types.
530 fn process_line(&self, line: &str, item_links: &HashMap<String, Id>) -> String {
531 // Skip lines that look like reference definitions (backup check)
532 if line.trim_start().starts_with("[`") && line.contains("]:") {
533 return String::new();
534 }
535
536 // Process in order of specificity (most specific patterns first)
537 let s = self.process_reference_links(line, item_links);
538 let s = self.process_path_reference_links(&s, item_links);
539 let s = self.process_method_links(&s, item_links);
540 let s = self.process_backtick_links(&s, item_links);
541 let s = self.process_plain_links(&s, item_links);
542
543 self.process_html_links_with_context(&s, item_links)
544 }
545
546 /// Process reference-style links `[display text][`Span`]`.
547 fn process_reference_links(&self, text: &str, item_links: &HashMap<String, Id>) -> String {
548 replace_with_regex(text, &REFERENCE_LINK_RE, |caps| {
549 let display_text = &caps[1];
550 let ref_key = &caps[2];
551
552 self.resolve_to_url(ref_key, item_links).map_or_else(
553 || caps[0].to_string(),
554 |url| format!("[{display_text}]({url})"),
555 )
556 })
557 }
558
559 /// Process path reference links `[text][crate::path::Item]`.
560 fn process_path_reference_links(&self, text: &str, item_links: &HashMap<String, Id>) -> String {
561 replace_with_regex(text, &PATH_REF_LINK_RE, |caps| {
562 let display_text = &caps[1];
563 let rust_path = &caps[2];
564
565 self.resolve_to_url(rust_path, item_links).map_or_else(
566 // Can't resolve - keep as inline code without broken anchor
567 || {
568 // Don't double-wrap in backticks
569 if display_text.starts_with('`') && display_text.ends_with('`') {
570 display_text.to_string()
571 } else {
572 format!("`{display_text}`")
573 }
574 },
575 |url| format!("[{display_text}]({url})"),
576 )
577 })
578 }
579
580 /// Process method links `[``Type::method``]`.
581 fn process_method_links(&self, text: &str, item_links: &HashMap<String, Id>) -> String {
582 replace_with_regex_checked(text, &METHOD_LINK_RE, |caps, rest| {
583 // Skip if already a markdown link
584 if rest.starts_with('(') {
585 return caps[0].to_string();
586 }
587
588 let full_path = &caps[1];
589 if let Some(last_sep) = full_path.rfind("::") {
590 let type_part = &full_path[..last_sep];
591 let method_part = &full_path[last_sep + 2..];
592
593 if let Some(link) = self.resolve_method_link(type_part, method_part, item_links) {
594 return link;
595 }
596 }
597 caps[0].to_string()
598 })
599 }
600
601 /// Process backtick links `[`Name`]`.
602 fn process_backtick_links(&self, text: &str, item_links: &HashMap<String, Id>) -> String {
603 replace_with_regex_checked(text, &BACKTICK_LINK_RE, |caps, rest| {
604 // Skip if already a markdown link
605 if rest.starts_with('(') {
606 return caps[0].to_string();
607 }
608
609 let link_text = &caps[1];
610 self.resolve_link(link_text, item_links)
611 })
612 }
613
614 /// Process plain links `[name]`.
615 fn process_plain_links(&self, text: &str, item_links: &HashMap<String, Id>) -> String {
616 replace_with_regex_checked(text, &PLAIN_LINK_RE, |caps, rest| {
617 // Skip if already a markdown link
618 if matches!(rest.chars().next(), Some('(' | '[')) {
619 return caps[0].to_string();
620 }
621
622 let link_text = &caps[1];
623
624 // Only process if it's in item_links (avoid false positives)
625 if let Some(id) = item_links.get(link_text)
626 && let Some(md_link) = self.create_link_for_id(*id, link_text)
627 {
628 return md_link;
629 }
630 caps[0].to_string()
631 })
632 }
633
634 /// Process HTML-style rustdoc links with context awareness.
635 ///
636 /// Instead of blindly converting all HTML links to local anchors,
637 /// this method checks if the item actually exists on the current page.
638 /// If not, it tries to resolve to docs.rs or removes the broken link.
639 fn process_html_links_with_context(
640 &self,
641 text: &str,
642 item_links: &HashMap<String, Id>,
643 ) -> String {
644 replace_with_regex(text, &HTML_LINK_RE, |caps| {
645 let item_kind = &caps[1]; // struct, enum, trait, etc.
646 let item_name = &caps[2];
647
648 // If there's a method/variant anchor part, remove the link entirely
649 // since methods don't have individual headings
650 if caps.get(4).is_some() {
651 return String::new();
652 }
653
654 // Try to find this item in our link resolution
655 if let Some(url) = self.resolve_html_link_to_url(item_name, item_kind, item_links) {
656 return format!("({url})");
657 }
658
659 // Fallback: remove the link part entirely (keep just the display text)
660 // This is better than creating a broken #anchor
661 String::new()
662 })
663 }
664
665 /// Try to resolve an HTML-style link to a proper URL.
666 ///
667 /// Returns a URL if the item can be resolved (either locally or to docs.rs),
668 /// or None if the item cannot be found.
669 fn resolve_html_link_to_url(
670 &self,
671 item_name: &str,
672 item_kind: &str,
673 item_links: &HashMap<String, Id>,
674 ) -> Option<String> {
675 // Strategy 1: Check if item is in item_links
676 if let Some(id) = item_links.get(item_name) {
677 // Check if it's on the current page
678 if let Some(path) = self.link_registry.get_path(*id) {
679 if path == self.current_file {
680 // Only create anchor if item has a heading
681 if let Some(path_info) = self.krate.paths.get(id)
682 && item_has_anchor(path_info.kind)
683 {
684 return Some(format!("#{}", item_name.to_lowercase()));
685 }
686 // Item on page but no anchor - link to page without anchor
687 return Some(String::new());
688 }
689 // Item is in another file
690 let relative = LinkRegistry::compute_relative_path(self.current_file, path);
691 return Some(relative);
692 }
693
694 // Try docs.rs for external crates
695 if let Some(path_info) = self.krate.paths.get(id)
696 && path_info.crate_id != 0
697 {
698 return Self::get_docs_rs_url(path_info);
699 }
700 }
701
702 // Strategy 2: Search path_name_index for the item name
703 if let Some(ids) = self.path_name_index.get(item_name) {
704 for id in ids {
705 if let Some(path) = self.link_registry.get_path(*id) {
706 if path == self.current_file {
707 // Only create anchor if item has a heading
708 if let Some(path_info) = self.krate.paths.get(id)
709 && item_has_anchor(path_info.kind)
710 {
711 return Some(format!("#{}", item_name.to_lowercase()));
712 }
713 // Item on page but no anchor - link to page without anchor
714 return Some(String::new());
715 }
716 let relative = LinkRegistry::compute_relative_path(self.current_file, path);
717 return Some(relative);
718 }
719
720 // Try docs.rs
721 if let Some(path_info) = self.krate.paths.get(id)
722 && path_info.crate_id != 0
723 {
724 return Self::get_docs_rs_url(path_info);
725 }
726 }
727 }
728
729 // Strategy 3: Search krate.paths for external items by name
730 // Collect all matches and pick the shortest path (most specific) for determinism
731 let mut matches: Vec<_> = self
732 .krate
733 .paths
734 .values()
735 .filter(|path_info| {
736 path_info.crate_id != 0
737 && path_info.path.last().is_some_and(|name| name == item_name)
738 && Self::kind_matches(item_kind, path_info.kind)
739 })
740 .collect();
741
742 // Sort by full path for deterministic selection
743 matches.sort_by(|a, b| a.path.join("::").cmp(&b.path.join("::")));
744
745 matches.first().and_then(|path_info| Self::get_docs_rs_url(path_info))
746 }
747
748 /// Check if the HTML link kind matches the rustdoc item kind.
749 fn kind_matches(html_kind: &str, item_kind: ItemKind) -> bool {
750 match html_kind {
751 "struct" => item_kind == ItemKind::Struct,
752 "enum" => item_kind == ItemKind::Enum,
753 "trait" => item_kind == ItemKind::Trait,
754 "fn" => item_kind == ItemKind::Function,
755 "type" => item_kind == ItemKind::TypeAlias,
756 "macro" => item_kind == ItemKind::Macro,
757 "constant" => item_kind == ItemKind::Constant,
758 "mod" => item_kind == ItemKind::Module,
759 _ => false,
760 }
761 }
762
763 /// Clean up multiple consecutive blank lines.
764 fn clean_blank_lines(docs: &str) -> String {
765 let mut result = String::with_capacity(docs.len());
766 let mut prev_blank = false;
767
768 for line in docs.lines() {
769 let is_blank = line.trim().is_empty();
770 if is_blank && prev_blank {
771 continue;
772 }
773 if !result.is_empty() {
774 result.push('\n');
775 }
776 result.push_str(line);
777 prev_blank = is_blank;
778 }
779
780 result.trim_end().to_string()
781 }
782
783 // =========================================================================
784 // Resolution Methods
785 // =========================================================================
786
787 /// Resolve a link reference to a URL.
788 fn resolve_to_url(&self, link_text: &str, item_links: &HashMap<String, Id>) -> Option<String> {
789 // Strategy 1: Exact match in item_links
790 if let Some(id) = item_links.get(link_text)
791 && let Some(url) = self.get_url_for_id(*id)
792 {
793 return Some(url);
794 }
795
796 // Strategy 2: Short name match in item_links
797 let short_name = link_text.split("::").last().unwrap_or(link_text);
798
799 for (key, id) in item_links {
800 if key.split("::").last() == Some(short_name)
801 && let Some(url) = self.get_url_for_id(*id)
802 {
803 return Some(url);
804 }
805 }
806
807 // Strategy 3: Use path name index
808 if let Some(ids) = self.path_name_index.get(short_name) {
809 for id in ids {
810 if let Some(url) = self.get_url_for_id(*id) {
811 return Some(url);
812 }
813 }
814 }
815
816 None
817 }
818
819 /// Get the URL for an ID (local or docs.rs).
820 fn get_url_for_id(&self, id: Id) -> Option<String> {
821 // Try local first
822 if let Some(path) = self.link_registry.get_path(id) {
823 let relative = LinkRegistry::compute_relative_path(self.current_file, path);
824 return Some(relative);
825 }
826
827 // Try docs.rs for external crates
828 if let Some(path_info) = self.krate.paths.get(&id)
829 && path_info.crate_id != 0
830 {
831 return Self::get_docs_rs_url(path_info);
832 }
833
834 None
835 }
836
837 /// Get docs.rs URL for an external crate item.
838 fn get_docs_rs_url(path_info: &rustdoc_types::ItemSummary) -> Option<String> {
839 let path = &path_info.path;
840 if path.is_empty() {
841 return None;
842 }
843
844 let crate_name = &path[0];
845
846 // Handle module URLs specially
847 if path_info.kind == ItemKind::Module {
848 if path.len() == 1 {
849 return Some(format!("https://docs.rs/{crate_name}/latest/{crate_name}/"));
850 }
851
852 let module_path = path[1..].join("/");
853
854 return Some(format!(
855 "https://docs.rs/{crate_name}/latest/{crate_name}/{module_path}/index.html"
856 ));
857 }
858
859 let item_path = path[1..].join("/");
860 let type_prefix = match path_info.kind {
861 ItemKind::Struct => "struct",
862 ItemKind::Enum => "enum",
863 ItemKind::Trait => "trait",
864 ItemKind::Function => "fn",
865 ItemKind::Constant => "constant",
866 ItemKind::TypeAlias => "type",
867 ItemKind::Macro => "macro",
868 _ => "index",
869 };
870
871 let item_name = path.last().unwrap_or(crate_name);
872
873 if item_path.is_empty() {
874 Some(format!("https://docs.rs/{crate_name}/latest/{crate_name}/"))
875 } else {
876 // Remove last segment from path for the directory
877 let dir_path = if path.len() > 2 {
878 path[1..path.len() - 1].join("/")
879 } else {
880 String::new()
881 };
882
883 if dir_path.is_empty() {
884 Some(format!(
885 "https://docs.rs/{crate_name}/latest/{crate_name}/{type_prefix}.{item_name}.html"
886 ))
887 } else {
888 Some(format!(
889 "https://docs.rs/{crate_name}/latest/{crate_name}/{dir_path}/{type_prefix}.{item_name}.html"
890 ))
891 }
892 }
893 }
894
895 /// Resolve a method link to a markdown link (without method anchor).
896 ///
897 /// Links to the type's page since methods don't have individual headings
898 /// in the generated markdown.
899 fn resolve_method_link(
900 &self,
901 type_name: &str,
902 method_name: &str,
903 item_links: &HashMap<String, Id>,
904 ) -> Option<String> {
905 // Try to find the type
906 let type_id = item_links.get(type_name).or_else(|| {
907 let short_type = type_name.split("::").last().unwrap_or(type_name);
908 item_links
909 .iter()
910 .find(|(k, _)| k.split("::").last() == Some(short_type))
911 .map(|(_, id)| id)
912 })?;
913
914 let type_path = self.link_registry.get_path(*type_id)?;
915 let relative = LinkRegistry::compute_relative_path(self.current_file, type_path);
916 let display = format!("{type_name}::{method_name}");
917
918 // Link to the type page without a method anchor (methods don't have headings)
919 Some(format!("[`{display}`]({relative})"))
920 }
921
922 /// Try to resolve link text to a markdown link.
923 fn resolve_link(&self, link_text: &str, item_links: &HashMap<String, Id>) -> String {
924 // Strategy 1: Exact match
925 if let Some(id) = item_links.get(link_text)
926 && let Some(md_link) = self.create_link_for_id(*id, link_text)
927 {
928 return md_link;
929 }
930
931 // Strategy 2: Short name match in item_links
932 let short_name = link_text.split("::").last().unwrap_or(link_text);
933
934 for (key, id) in item_links {
935 if key.split("::").last() == Some(short_name)
936 && let Some(md_link) = self.create_link_for_id(*id, short_name)
937 {
938 return md_link;
939 }
940 }
941
942 // Strategy 3: Use path name index
943 if let Some(ids) = self.path_name_index.get(short_name) {
944 for id in ids {
945 if let Some(md_link) = self.create_link_for_id(*id, short_name) {
946 return md_link;
947 }
948 }
949 }
950
951 // Fallback: return original
952 format!("[`{link_text}`]")
953 }
954
955 /// Create a markdown link for an ID.
956 fn create_link_for_id(&self, id: Id, display_name: &str) -> Option<String> {
957 // Try local link
958 if let Some(link) = self.link_registry.create_link(id, self.current_file) {
959 return Some(link);
960 }
961
962 if let Some(path) = self.link_registry.get_path(id) {
963 let relative = LinkRegistry::compute_relative_path(self.current_file, path);
964 let clean_name = display_name.split("::").last().unwrap_or(display_name);
965 return Some(format!("[`{clean_name}`]({relative})"));
966 }
967
968 // Try docs.rs for external crates
969 if let Some(path_info) = self.krate.paths.get(&id)
970 && path_info.crate_id != 0
971 {
972 return Self::create_docs_rs_link(path_info, display_name);
973 }
974
975 None
976 }
977
978 /// Create a docs.rs link for an external crate item.
979 fn create_docs_rs_link(
980 path_info: &rustdoc_types::ItemSummary,
981 display_name: &str,
982 ) -> Option<String> {
983 let url = Self::get_docs_rs_url(path_info)?;
984 let clean_name = display_name.split("::").last().unwrap_or(display_name);
985 Some(format!("[`{clean_name}`]({url})"))
986 }
987}
988
989// =============================================================================
990// Helper Functions
991// =============================================================================
992
993/// Replace regex matches using a closure.
994fn replace_with_regex<F>(text: &str, re: &Regex, replacer: F) -> String
995where
996 F: Fn(®ex::Captures<'_>) -> String,
997{
998 let mut result = String::with_capacity(text.len());
999 let mut last_end = 0;
1000
1001 for caps in re.captures_iter(text) {
1002 let m = caps.get(0).unwrap();
1003 result.push_str(&text[last_end..m.start()]);
1004 result.push_str(&replacer(&caps));
1005 last_end = m.end();
1006 }
1007
1008 result.push_str(&text[last_end..]);
1009 result
1010}
1011
1012/// Replace regex matches with access to the text after the match.
1013fn replace_with_regex_checked<F>(text: &str, re: &Regex, replacer: F) -> String
1014where
1015 F: Fn(®ex::Captures<'_>, &str) -> String,
1016{
1017 let mut result = String::with_capacity(text.len());
1018 let mut last_end = 0;
1019
1020 for caps in re.captures_iter(text) {
1021 let m = caps.get(0).unwrap();
1022 result.push_str(&text[last_end..m.start()]);
1023 let rest = &text[m.end()..];
1024 result.push_str(&replacer(&caps, rest));
1025 last_end = m.end();
1026 }
1027
1028 result.push_str(&text[last_end..]);
1029 result
1030}
1031
1032// =============================================================================
1033// Tests
1034// =============================================================================
1035
1036#[cfg(test)]
1037mod tests {
1038 use super::*;
1039
1040 #[test]
1041 fn test_convert_html_links() {
1042 // Type-level links get anchors
1043 assert_eq!(
1044 convert_html_links("See (enum.Foo.html) for details"),
1045 "See (#foo) for details"
1046 );
1047 // Method-level links are removed (methods don't have anchors)
1048 assert_eq!(
1049 convert_html_links("Call (struct.Bar.html#method.new)"),
1050 "Call "
1051 );
1052 }
1053
1054 #[test]
1055 fn test_strip_duplicate_title() {
1056 let docs = "# my_crate\n\nThis is the description.";
1057 assert_eq!(
1058 strip_duplicate_title(docs, "my_crate"),
1059 "This is the description."
1060 );
1061
1062 // Different title - keep it
1063 let docs2 = "# Introduction\n\nThis is the description.";
1064 assert_eq!(strip_duplicate_title(docs2, "my_crate"), docs2);
1065
1066 // Backticks around title (e.g., # `clap_builder`)
1067 let docs3 = "# `clap_builder`\n\nBuilder implementation.";
1068 assert_eq!(
1069 strip_duplicate_title(docs3, "clap_builder"),
1070 "Builder implementation."
1071 );
1072
1073 // Spaced title (e.g., # Serde JSON -> serde_json)
1074 let docs4 = "# Serde JSON\n\nJSON serialization.";
1075 assert_eq!(
1076 strip_duplicate_title(docs4, "serde_json"),
1077 "JSON serialization."
1078 );
1079
1080 // Hyphenated name
1081 let docs5 = "# my-crate\n\nDescription.";
1082 assert_eq!(strip_duplicate_title(docs5, "my_crate"), "Description.");
1083 }
1084
1085 #[test]
1086 fn test_strip_reference_definitions() {
1087 // Backtick-style reference definitions
1088 let docs = "See [`Foo`] for details.\n\n[`Foo`]: crate::Foo";
1089 let result = strip_reference_definitions(docs);
1090 assert!(result.contains("See [`Foo`]"));
1091 assert!(!result.contains("[`Foo`]: crate::Foo"));
1092
1093 // Plain reference definitions (no backticks)
1094 let docs2 = "Use [value] here.\n\n[value]: crate::value::Value";
1095 let result2 = strip_reference_definitions(docs2);
1096 assert!(result2.contains("Use [value]"));
1097 assert!(!result2.contains("[value]: crate::value::Value"));
1098
1099 // Reference definitions with anchors
1100 let docs3 = "See [from_str](#from-str) docs.\n\n[from_str](#from-str): crate::de::from_str";
1101 let result3 = strip_reference_definitions(docs3);
1102 assert!(result3.contains("See [from_str](#from-str)"));
1103 assert!(!result3.contains("[from_str](#from-str): crate::de::from_str"));
1104
1105 // Multiple reference definitions
1106 let docs4 = "Content.\n\n[a]: path::a\n[b]: path::b\n[`c`]: path::c";
1107 let result4 = strip_reference_definitions(docs4);
1108 assert_eq!(result4.trim(), "Content.");
1109 }
1110
1111 #[test]
1112 fn test_convert_path_reference_links() {
1113 // Path references become inline code (can't create valid anchors without context)
1114 let docs = "[`Tracker`][crate::style::Tracker] is useful";
1115 let result = convert_path_reference_links(docs);
1116 assert_eq!(result, "`Tracker` is useful");
1117 }
1118
1119 #[test]
1120 fn test_unhide_code_lines_strips_hidden_prefix() {
1121 let docs = "```\n# #[cfg(feature = \"test\")]\n# {\nuse foo::bar;\n# }\n```";
1122 let result = unhide_code_lines(docs);
1123 assert_eq!(
1124 result,
1125 "```rust\n#[cfg(feature = \"test\")]\n{\nuse foo::bar;\n}\n```"
1126 );
1127 }
1128
1129 #[test]
1130 fn test_unhide_code_lines_adds_rust_to_bare_fence() {
1131 let docs = "```\nlet x = 1;\n```";
1132 let result = unhide_code_lines(docs);
1133 assert_eq!(result, "```rust\nlet x = 1;\n```");
1134 }
1135
1136 #[test]
1137 fn test_unhide_code_lines_preserves_existing_language() {
1138 let docs = "```python\nprint('hello')\n```";
1139 let result = unhide_code_lines(docs);
1140 assert_eq!(result, "```python\nprint('hello')\n```");
1141 }
1142
1143 #[test]
1144 fn test_unhide_code_lines_handles_tilde_fence() {
1145 let docs = "~~~\ncode\n~~~";
1146 let result = unhide_code_lines(docs);
1147 assert_eq!(result, "~~~rust\ncode\n~~~");
1148 }
1149
1150 #[test]
1151 fn test_unhide_code_lines_lone_hash() {
1152 // A lone # becomes an empty line
1153 let docs = "```\n#\nlet x = 1;\n```";
1154 let result = unhide_code_lines(docs);
1155 assert_eq!(result, "```rust\n\nlet x = 1;\n```");
1156 }
1157}