ndg_commonmark/processor/
extensions.rs

1//! Feature-specific Markdown processing extensions.
2use std::{fmt::Write, fs, path::Path};
3
4use html_escape::{encode_double_quoted_attribute, encode_text};
5
6use super::{dom::safe_select, process::process_safe};
7
8/// Sanitize an option name into a valid HTML ID, matching nixos-render-docs
9/// XML ID format.
10///
11/// Translates `*`, `<`, `>`, `[`, `]`, `:`, `"`, and space to `_`.
12/// Dots are preserved to match nixos-render-docs behavior.
13fn sanitize_option_id(name: &str) -> String {
14  let sanitized: String = name
15    .chars()
16    .map(|c| {
17      match c {
18        '*' | '<' | '>' | '[' | ']' | ':' | '"' | ' ' => '_',
19        c => c,
20      }
21    })
22    .collect();
23  format!("option-{sanitized}")
24}
25
26/// Apply GitHub Flavored Markdown (GFM) extensions to the input markdown.
27///
28/// This is a placeholder for future GFM-specific preprocessing or AST
29/// transformations. In practice, most GFM features are enabled via comrak
30/// options, but additional logic (such as custom tables, task lists, etc.) can
31/// be added here.
32///
33/// # Arguments
34/// * `markdown` - The input markdown text
35///
36/// # Returns
37/// The processed markdown text with GFM extensions applied
38#[cfg(feature = "gfm")]
39#[must_use]
40pub fn apply_gfm_extensions(markdown: &str) -> String {
41  // XXX: Comrak already supports GFM, but if there is any feature in the spec
42  // that is not implemented as we'd like for it to be, we can add it here.
43  markdown.to_owned()
44}
45
46/// Maximum recursion depth for file includes to prevent infinite recursion.
47const MAX_INCLUDE_DEPTH: usize = 8;
48
49/// Internal sentinel inserted between included files before block processing.
50const INCLUDE_BOUNDARY_MARKER: &str = "<!-- ndg:include-boundary -->";
51
52/// Check if a path is safe for file inclusion (no parent directory traversal).
53#[cfg(feature = "nixpkgs")]
54fn is_safe_path(path: &str, _base_dir: &Path) -> bool {
55  let p = Path::new(path);
56  if path.contains('\\') {
57    return false;
58  }
59
60  // Reject any path containing parent directory components
61  for component in p.components() {
62    if matches!(component, std::path::Component::ParentDir) {
63      return false;
64    }
65  }
66
67  true
68}
69
70/// Parse the custom output directive from an include block.
71#[cfg(feature = "nixpkgs")]
72struct IncludeDirective {
73  custom_output:  Option<String>,
74  include_type:   Option<String>,
75  auto_id_prefix: Option<String>,
76}
77
78#[cfg(feature = "nixpkgs")]
79fn parse_include_directive(line: &str) -> IncludeDirective {
80  let after_marker = line.strip_prefix("```{=include=}").unwrap_or(line).trim();
81  let include_type = after_marker
82    .split_whitespace()
83    .find(|part| {
84      !part.starts_with("html:into-file=")
85        && !part.starts_with("auto-id-prefix=")
86    })
87    .map(str::to_string);
88
89  let custom_output = directive_value(line, "html:into-file=");
90  let auto_id_prefix = directive_value(line, "auto-id-prefix=");
91
92  IncludeDirective {
93    custom_output,
94    include_type,
95    auto_id_prefix,
96  }
97}
98
99#[cfg(feature = "nixpkgs")]
100fn directive_value(line: &str, marker: &str) -> Option<String> {
101  line.find(marker).map(|start| {
102    let start = start + marker.len();
103    line[start..].find(' ').map_or_else(
104      || line[start..].trim().to_string(),
105      |end| line[start..start + end].to_string(),
106    )
107  })
108}
109
110#[cfg(feature = "nixpkgs")]
111fn apply_auto_id_prefix(content: &str, prefix: &str) -> String {
112  if prefix.is_empty() {
113    return content.to_string();
114  }
115
116  let mut result = String::with_capacity(content.len());
117  let mut fence_tracker = crate::utils::codeblock::FenceTracker::new();
118  let mut heading_numbers = Vec::new();
119
120  for line in content.lines() {
121    fence_tracker = fence_tracker.process_line(line);
122    if fence_tracker.in_code_block() {
123      result.push_str(line);
124    } else if let Some(line) =
125      add_auto_id_to_heading(line, prefix, &mut heading_numbers)
126    {
127      result.push_str(&line);
128    } else {
129      result.push_str(line);
130    }
131    result.push('\n');
132  }
133
134  result
135}
136
137#[cfg(feature = "nixpkgs")]
138fn add_auto_id_to_heading(
139  line: &str,
140  prefix: &str,
141  heading_numbers: &mut Vec<usize>,
142) -> Option<String> {
143  let leading_len = line.len() - line.trim_start().len();
144  if leading_len > 3 {
145    return None;
146  }
147
148  let trimmed = line.trim_start();
149  let level = trimmed.chars().take_while(|&ch| ch == '#').count();
150  if !(1..=6).contains(&level) {
151    return None;
152  }
153
154  let after_hashes = &trimmed[level..];
155  if !after_hashes.is_empty() && !after_hashes.starts_with(char::is_whitespace)
156  {
157    return None;
158  }
159
160  let heading = after_hashes.trim();
161  if heading.is_empty() {
162    return None;
163  }
164
165  if level > heading_numbers.len() {
166    heading_numbers.resize(level, 0);
167  }
168  heading_numbers.truncate(level);
169  heading_numbers[level - 1] += 1;
170
171  if heading.contains("{#") {
172    return None;
173  }
174
175  let id = heading_numbers
176    .iter()
177    .map(usize::to_string)
178    .collect::<Vec<_>>()
179    .join(".");
180
181  Some(format!(
182    "{}{} {{#{}-{}}}",
183    &line[..leading_len],
184    trimmed,
185    prefix,
186    id
187  ))
188}
189
190#[cfg(feature = "nixpkgs")]
191fn render_options_include(content: &str) -> Option<String> {
192  let data: serde_json::Value = serde_json::from_str(content).ok()?;
193  let options = data.as_object()?;
194  let mut result = String::new();
195
196  for (name, value) in options {
197    let option_data = value.as_object()?;
198    let option_id = sanitize_option_id(name);
199    let _ = writeln!(
200      result,
201      "<div class=\"option\" id=\"{}\">",
202      encode_double_quoted_attribute(&option_id)
203    );
204    let _ = writeln!(
205      result,
206      "  <h3 class=\"option-name\"><a href=\"#{}\" \
207       class=\"option-anchor\">{}</a></h3>",
208      encode_double_quoted_attribute(&option_id),
209      encode_text(name)
210    );
211
212    if let Some(type_name) = option_data.get("type").and_then(|v| v.as_str()) {
213      let _ = writeln!(
214        result,
215        "  <div class=\"option-type\">Type: <code>{}</code></div>",
216        encode_text(type_name)
217      );
218    }
219
220    if let Some(description) = option_data.get("description") {
221      let description = match description {
222        serde_json::Value::String(value) => value.as_str(),
223        serde_json::Value::Object(object)
224          if object.get("_type").and_then(|v| v.as_str())
225            == Some("literalMD") =>
226        {
227          object.get("text").and_then(|v| v.as_str()).unwrap_or("")
228        },
229        _ => "",
230      };
231
232      if !description.is_empty() {
233        let _ = writeln!(
234          result,
235          "  <div class=\"option-description\">{}</div>",
236          encode_text(description)
237        );
238      }
239    }
240
241    result.push_str("</div>\n");
242  }
243
244  Some(result)
245}
246
247#[cfg(feature = "nixpkgs")]
248fn read_options_includes(
249  listing: &str,
250  base_dir: &Path,
251  included_files: &mut Vec<crate::types::IncludedFile>,
252) -> String {
253  if let Some(source) = parse_options_source(listing) {
254    return read_options_file(&source, base_dir, included_files);
255  }
256
257  let mut result = String::new();
258
259  for line in listing.lines() {
260    let trimmed = line.trim();
261    if trimmed.is_empty() || !is_safe_path(trimmed, base_dir) {
262      continue;
263    }
264
265    let full_path = base_dir.join(trimmed);
266    match fs::read_to_string(&full_path) {
267      Ok(content) => {
268        if let Some(rendered) = render_options_include(&content) {
269          result.push_str(&rendered);
270        } else {
271          let _ = writeln!(
272            result,
273            "<!-- ndg: could not parse options include: {} -->",
274            full_path.display()
275          );
276        }
277        included_files.push(crate::types::IncludedFile {
278          path:          trimmed.to_string(),
279          custom_output: None,
280        });
281      },
282      Err(_) => {
283        let _ = writeln!(
284          result,
285          "<!-- ndg: could not include file: {} -->",
286          full_path.display()
287        );
288      },
289    }
290  }
291
292  result
293}
294
295#[cfg(feature = "nixpkgs")]
296fn parse_options_source(listing: &str) -> Option<String> {
297  let mut source = None;
298  for line in listing.lines() {
299    let (key, value) = line.split_once(':')?;
300    if key.trim() == "source" {
301      source = Some(value.trim().to_string());
302    }
303  }
304  source
305}
306
307#[cfg(feature = "nixpkgs")]
308fn read_options_file(
309  source: &str,
310  base_dir: &Path,
311  included_files: &mut Vec<crate::types::IncludedFile>,
312) -> String {
313  let mut result = String::new();
314  if !is_safe_path(source, base_dir) {
315    return result;
316  }
317
318  let full_path = base_dir.join(source);
319  match fs::read_to_string(&full_path) {
320    Ok(content) => {
321      if let Some(rendered) = render_options_include(&content) {
322        result.push_str(&rendered);
323      } else {
324        let _ = writeln!(
325          result,
326          "<!-- ndg: could not parse options include: {} -->",
327          full_path.display()
328        );
329      }
330      included_files.push(crate::types::IncludedFile {
331        path:          source.to_string(),
332        custom_output: None,
333      });
334    },
335    Err(_) => {
336      let _ = writeln!(
337        result,
338        "<!-- ndg: could not include file: {} -->",
339        full_path.display()
340      );
341    },
342  }
343
344  result
345}
346
347/// Read and process files listed in an include block.
348#[cfg(feature = "nixpkgs")]
349#[expect(
350  clippy::needless_pass_by_value,
351  reason = "Owned value needed for cloning in loop"
352)]
353fn read_includes(
354  listing: &str,
355  base_dir: &Path,
356  custom_output: Option<String>,
357  auto_id_prefix: Option<String>,
358  included_files: &mut Vec<crate::types::IncludedFile>,
359  depth: usize,
360) -> Result<String, String> {
361  let mut result = String::new();
362
363  for (line_index, line) in listing.lines().enumerate() {
364    let trimmed = line.trim();
365    if trimmed.is_empty() || !is_safe_path(trimmed, base_dir) {
366      continue;
367    }
368    let full_path = base_dir.join(trimmed);
369    log::info!("Including file: {}", full_path.display());
370
371    match fs::read_to_string(&full_path) {
372      Ok(content) => {
373        let file_dir = full_path.parent().unwrap_or(base_dir);
374        let (processed_content, nested_includes) =
375          process_file_includes(&content, file_dir, depth + 1)?;
376
377        let processed_content = if let Some(prefix) = auto_id_prefix.as_deref()
378        {
379          apply_auto_id_prefix(
380            &processed_content,
381            &format!("{}-{}", prefix, line_index + 1),
382          )
383        } else {
384          processed_content
385        };
386
387        if custom_output.is_none() {
388          result.push_str(&processed_content);
389          if !processed_content.ends_with('\n') {
390            result.push('\n');
391          }
392          result.push_str(INCLUDE_BOUNDARY_MARKER);
393          result.push('\n');
394        }
395
396        included_files.push(crate::types::IncludedFile {
397          path:          trimmed.to_string(),
398          custom_output: custom_output.clone(),
399        });
400
401        // Normalize nested include paths relative to original base_dir
402        for nested in nested_includes {
403          let nested_full_path = file_dir.join(&nested.path);
404          if let Ok(normalized_path) = nested_full_path.strip_prefix(base_dir) {
405            included_files.push(crate::types::IncludedFile {
406              path:          normalized_path.to_string_lossy().to_string(),
407              custom_output: nested.custom_output,
408            });
409          }
410        }
411      },
412      Err(_) => {
413        let _ = writeln!(
414          result,
415          "<!-- ndg: could not include file: {} -->",
416          full_path.display()
417        );
418      },
419    }
420  }
421  Ok(result)
422}
423
424/// Process file includes in Nixpkgs/NixOS documentation.
425///
426/// This function processes file include syntax:
427///
428/// ````markdown
429/// ```{=include=}
430/// path/to/file1.md
431/// path/to/file2.md
432/// ```
433/// ````
434///
435/// # Arguments
436///
437/// * `markdown` - The input markdown text
438/// * `base_dir` - The base directory for resolving relative file paths
439/// * `depth` - Current recursion depth (use 0 for initial call)
440///
441/// # Returns
442///
443/// Returns `Ok((processed_markdown, included_files))` where `included_files` is
444/// a list of all successfully included files.
445///
446/// # Errors
447///
448/// Returns `Err(message)` if recursion depth exceeds [`MAX_INCLUDE_DEPTH`],
449/// which likely indicates a circular include cycle.
450///
451/// # Safety
452///
453/// Only relative paths without ".." are allowed for security.
454#[cfg(feature = "nixpkgs")]
455pub fn process_file_includes(
456  markdown: &str,
457  base_dir: &std::path::Path,
458  depth: usize,
459) -> Result<(String, Vec<crate::types::IncludedFile>), String> {
460  // Check recursion depth limit
461  if depth >= MAX_INCLUDE_DEPTH {
462    return Err(format!(
463      "Maximum include recursion depth ({MAX_INCLUDE_DEPTH}) exceeded. This \
464       likely indicates a cycle in file includes."
465    ));
466  }
467
468  let mut output = String::new();
469  let mut lines = markdown.lines();
470  let mut fence_tracker = crate::utils::codeblock::FenceTracker::new();
471  let mut all_included_files: Vec<crate::types::IncludedFile> = Vec::new();
472
473  while let Some(line) = lines.next() {
474    if line.trim() == INCLUDE_BOUNDARY_MARKER {
475      continue;
476    }
477
478    let trimmed = line.trim_start();
479
480    if !fence_tracker.in_code_block() && trimmed.starts_with("```{=include=}") {
481      let directive = parse_include_directive(trimmed);
482
483      let mut include_listing = String::new();
484      for next_line in lines.by_ref() {
485        if next_line.trim_start().starts_with("```") {
486          break;
487        }
488        include_listing.push_str(next_line);
489        include_listing.push('\n');
490      }
491
492      let included = if directive.include_type.as_deref() == Some("options") {
493        read_options_includes(
494          &include_listing,
495          base_dir,
496          &mut all_included_files,
497        )
498      } else {
499        read_includes(
500          &include_listing,
501          base_dir,
502          directive.custom_output,
503          directive.auto_id_prefix,
504          &mut all_included_files,
505          depth,
506        )?
507      };
508      output.push_str(&included);
509      continue;
510    }
511
512    // Update fence tracking state
513    fence_tracker = fence_tracker.process_line(line);
514
515    output.push_str(line);
516    output.push('\n');
517  }
518
519  Ok((output, all_included_files))
520}
521
522/// Process role markup in markdown content.
523///
524/// This function processes role syntax like `{command}ls -la`
525///
526/// # Arguments
527///
528/// * `content` - The markdown content to process
529/// * `manpage_urls` - Optional mapping of manpage names to URLs
530/// * `auto_link_options` - Whether to convert {option} roles to links
531/// * `valid_options` - Optional set of valid option names for validation
532///
533/// # Returns
534///
535/// The processed markdown with role markup converted to HTML
536#[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
537#[must_use]
538#[expect(
539  clippy::implicit_hasher,
540  reason = "Standard HashMap/HashSet sufficient for this use case"
541)]
542pub fn process_role_markup(
543  content: &str,
544  manpage_urls: Option<&rustc_hash::FxHashMap<String, String>>,
545  auto_link_options: bool,
546  valid_options: Option<&rustc_hash::FxHashSet<String>>,
547) -> String {
548  let mut result = String::new();
549  let mut chars = content.chars().peekable();
550  let mut tracker = crate::utils::codeblock::InlineTracker::new();
551
552  while let Some(ch) = chars.next() {
553    // Handle backticks (code fences and inline code)
554    if ch == '`' {
555      let (new_tracker, tick_count) = tracker.process_backticks(&mut chars);
556      tracker = new_tracker;
557
558      // Add all the backticks
559      result.push_str(&"`".repeat(tick_count));
560      continue;
561    }
562
563    // Handle tilde code fences (~~~)
564    if ch == '~' && chars.peek() == Some(&'~') {
565      let (new_tracker, tilde_count) = tracker.process_tildes(&mut chars);
566      tracker = new_tracker;
567
568      result.push_str(&"~".repeat(tilde_count));
569      continue;
570    }
571
572    // Handle newlines
573    if ch == '\n' {
574      tracker = tracker.process_newline();
575      result.push(ch);
576      continue;
577    }
578
579    // Process role markup only if we're not in any kind of code
580    if ch == '{' && !tracker.in_any_code() {
581      // Collect remaining characters to test parsing
582      let remaining: Vec<char> = chars.clone().collect();
583      let remaining_str: String = remaining.iter().collect();
584      let mut temp_chars = remaining_str.chars().peekable();
585
586      if let Some(role_markup) = parse_role_markup(
587        &mut temp_chars,
588        manpage_urls,
589        auto_link_options,
590        valid_options,
591      ) {
592        // Valid role markup found, advance the main iterator
593        let remaining_after_parse: String = temp_chars.collect();
594        let consumed = remaining_str.len() - remaining_after_parse.len();
595        for _ in 0..consumed {
596          chars.next();
597        }
598        result.push_str(&role_markup);
599      } else {
600        // Not a valid role markup, keep the original character
601        result.push(ch);
602      }
603    } else {
604      result.push(ch);
605    }
606  }
607
608  result
609}
610
611/// Parse a role markup from the character iterator.
612///
613/// # Returns
614///
615/// `Some(html)` if a valid role markup is found, `None` otherwise.
616fn parse_role_markup(
617  chars: &mut std::iter::Peekable<std::str::Chars>,
618  manpage_urls: Option<&rustc_hash::FxHashMap<String, String>>,
619  auto_link_options: bool,
620  valid_options: Option<&rustc_hash::FxHashSet<String>>,
621) -> Option<String> {
622  let mut role_name = String::new();
623
624  // Parse role name (lowercase letters only)
625  while let Some(&ch) = chars.peek() {
626    if ch.is_ascii_lowercase() {
627      role_name.push(ch);
628      chars.next();
629    } else {
630      break;
631    }
632  }
633
634  // Must have a non-empty role name
635  if role_name.is_empty() {
636    return None;
637  }
638
639  // Expect closing brace
640  if chars.peek() != Some(&'}') {
641    return None;
642  }
643  chars.next(); // consume '}'
644
645  // Expect opening backtick
646  if chars.peek() != Some(&'`') {
647    return None;
648  }
649  chars.next(); // consume '`'
650
651  // Parse content until closing backtick
652  let mut content = String::new();
653  for ch in chars.by_ref() {
654    if ch == '`' {
655      // Found closing backtick, validate content
656      // Most role types should not have empty content
657      if content.is_empty() && !matches!(role_name.as_str(), "manpage") {
658        return None; // reject empty content for most roles
659      }
660      return Some(format_role_markup(
661        &role_name,
662        &content,
663        manpage_urls,
664        auto_link_options,
665        valid_options,
666      ));
667    }
668    content.push(ch);
669  }
670
671  // No closing backtick found
672  None
673}
674
675/// Format the role markup as HTML based on the role type and content.
676#[must_use]
677#[expect(
678  clippy::option_if_let_else,
679  reason = "Nested options clearer with if-let"
680)]
681#[expect(
682  clippy::implicit_hasher,
683  reason = "Standard HashMap/HashSet sufficient for this use case"
684)]
685pub fn format_role_markup(
686  role_type: &str,
687  content: &str,
688  manpage_urls: Option<&rustc_hash::FxHashMap<String, String>>,
689  auto_link_options: bool,
690  valid_options: Option<&rustc_hash::FxHashSet<String>>,
691) -> String {
692  let escaped_content = encode_text(content);
693  match role_type {
694    "manpage" => {
695      if let Some(urls) = manpage_urls {
696        if let Some(url) = urls.get(content) {
697          format!(
698            "<a href=\"{url}\" \
699             class=\"manpage-reference\">{escaped_content}</a>"
700          )
701        } else {
702          format!("<span class=\"manpage-reference\">{escaped_content}</span>")
703        }
704      } else {
705        format!("<span class=\"manpage-reference\">{escaped_content}</span>")
706      }
707    },
708    "command" => format!("<code class=\"command\">{escaped_content}</code>"),
709    "env" => format!("<code class=\"env-var\">{escaped_content}</code>"),
710    "file" => format!("<code class=\"file-path\">{escaped_content}</code>"),
711    "option" => {
712      if cfg!(feature = "ndg-flavored") && auto_link_options {
713        // Check if validation is enabled and option is valid
714        let should_link =
715          valid_options.is_none_or(|opts| opts.contains(content)); // If no validation set, link all options
716
717        if should_link {
718          let option_id = sanitize_option_id(content);
719          format!(
720            "<a class=\"option-reference\" \
721             href=\"options.html#{option_id}\"><code \
722             class=\"nixos-option\">{escaped_content}</code></a>"
723          )
724        } else {
725          format!("<code class=\"nixos-option\">{escaped_content}</code>")
726        }
727      } else {
728        format!("<code class=\"nixos-option\">{escaped_content}</code>")
729      }
730    },
731    "var" => format!("<code class=\"nix-var\">{escaped_content}</code>"),
732    _ => format!("<span class=\"{role_type}-markup\">{escaped_content}</span>"),
733  }
734}
735
736/// Process MyST-style autolinks in markdown content.
737///
738/// Converts MyST-like autolinks supported by Nixpkgs-flavored commonmark:
739/// - `[](#anchor)` -> `[](#anchor) -> {{ANCHOR}}` (placeholder for comrak)
740/// - `[](https://url)` -> `<https://url>` (converted to standard autolink)
741///
742/// # Arguments
743///
744/// * `content` - The markdown content to process
745///
746/// # Returns
747///
748/// The processed markdown with `MyST` autolinks converted as a [`String`]
749#[must_use]
750pub fn process_myst_autolinks(content: &str) -> String {
751  let mut result = String::with_capacity(content.len());
752  let mut fence_tracker = crate::utils::codeblock::FenceTracker::new();
753
754  for line in content.lines() {
755    // Update fence tracking state
756    fence_tracker = fence_tracker.process_line(line);
757
758    // Only process MyST autolinks if we're not in a code block
759    if fence_tracker.in_code_block() {
760      result.push_str(line);
761    } else {
762      result.push_str(&process_line_myst_autolinks(line));
763    }
764    result.push('\n');
765  }
766
767  result
768}
769
770/// Process `MyST` autolinks in a single line.
771fn process_line_myst_autolinks(line: &str) -> String {
772  let mut result = String::with_capacity(line.len());
773  let mut chars = line.chars().peekable();
774  let mut tracker = crate::utils::codeblock::InlineTracker::new();
775
776  while let Some(ch) = chars.next() {
777    if ch == '`' {
778      let (new_tracker, tick_count) = tracker.process_backticks(&mut chars);
779      tracker = new_tracker;
780      result.push_str(&"`".repeat(tick_count));
781      continue;
782    }
783
784    if ch == '[' && chars.peek() == Some(&']') && !tracker.in_any_code() {
785      chars.next(); // consume ']'
786
787      // Check if this is []{#...} syntax (inline anchor, not autolink)
788      // Nice pit, would be a shame if someone was to... fall into it.
789      if chars.peek() == Some(&'{') {
790        // This is inline anchor syntax, not autolink, keep as-is
791        result.push_str("[]");
792        continue;
793      }
794
795      if chars.peek() == Some(&'(') {
796        chars.next(); // consume '('
797
798        // Collect URL until ')'
799        let mut url = String::new();
800        let mut found_closing = false;
801        while let Some(&next_ch) = chars.peek() {
802          if next_ch == ')' {
803            chars.next(); // consume ')'
804            found_closing = true;
805            break;
806          }
807          url.push(next_ch);
808          chars.next();
809        }
810
811        if found_closing && !url.is_empty() {
812          // Check if it's an anchor link (starts with #) or a URL
813          if url.starts_with('#') {
814            // Add placeholder text for comrak to parse it as a link
815            let _ = write!(result, "[{{{{ANCHOR}}}}]({url})");
816          } else if url.starts_with("http://") || url.starts_with("https://") {
817            // Convert URL autolinks to standard <url> format
818            let _ = write!(result, "<{url}>");
819          } else {
820            // Keep other patterns as-is
821            let _ = write!(result, "[]({url})");
822          }
823        } else {
824          // Malformed, put back what we consumed
825          result.push_str("](");
826          result.push_str(&url);
827        }
828      } else {
829        // Not a link, put back consumed character
830        result.push(']');
831      }
832    } else {
833      result.push(ch);
834    }
835  }
836
837  result
838}
839
840/// Process inline anchors in markdown content.
841///
842/// This function processes inline anchor syntax like `[]{#my-anchor}` while
843/// being code-block aware to avoid processing inside code fences.
844///
845/// # Arguments
846///
847/// * `content` - The markdown content to process
848///
849/// # Returns
850///
851/// The processed markdown with inline anchors converted to HTML spans
852#[cfg(feature = "nixpkgs")]
853#[must_use]
854pub fn process_inline_anchors(content: &str) -> String {
855  let mut result = String::with_capacity(content.len() + 100);
856  let mut fence_tracker = crate::utils::codeblock::FenceTracker::new();
857
858  for line in content.lines() {
859    let trimmed = line.trim_start();
860
861    // Update fence tracking state
862    fence_tracker = fence_tracker.process_line(line);
863
864    // Only process inline anchors if we're not in a code block
865    if fence_tracker.in_code_block() {
866      // In code block, keep line as-is
867      result.push_str(line);
868    } else {
869      // Check for list items with anchors:
870      // "- []{#id} content" or "1. []{#id} content"
871      if let Some(anchor_start) = find_list_item_anchor(trimmed)
872        && let Some(processed_line) =
873          process_list_item_anchor(line, anchor_start)
874      {
875        result.push_str(&processed_line);
876        result.push('\n');
877        continue;
878      }
879
880      // Process regular inline anchors in the line
881      result.push_str(&process_line_anchors(line));
882    }
883    result.push('\n');
884  }
885
886  result
887}
888
889/// Process Pandoc/CommonMark bracketed spans: `[text]{#id .class key=value}`.
890#[cfg(feature = "nixpkgs")]
891#[must_use]
892pub fn process_bracketed_spans(content: &str) -> String {
893  let mut result = String::with_capacity(content.len());
894  let mut fence_tracker = crate::utils::codeblock::FenceTracker::new();
895
896  for line in content.lines() {
897    fence_tracker = fence_tracker.process_line(line);
898    if fence_tracker.in_code_block() {
899      result.push_str(line);
900    } else {
901      result.push_str(&process_line_bracketed_spans(line));
902    }
903    result.push('\n');
904  }
905
906  result
907}
908
909#[cfg(feature = "nixpkgs")]
910fn process_line_bracketed_spans(line: &str) -> String {
911  let mut result = String::with_capacity(line.len());
912  let mut chars = line.chars().peekable();
913  let mut tracker = crate::utils::codeblock::InlineTracker::new();
914  let mut previous = None;
915
916  while let Some(ch) = chars.next() {
917    if ch == '`' {
918      let (new_tracker, tick_count) = tracker.process_backticks(&mut chars);
919      tracker = new_tracker;
920      result.push_str(&"`".repeat(tick_count));
921      previous = Some('`');
922      continue;
923    }
924
925    if ch == '[' && previous != Some('!') && !tracker.in_any_code() {
926      let remaining: String = chars.clone().collect();
927      if let Some((html, consumed)) = parse_bracketed_span(&remaining) {
928        for _ in 0..consumed {
929          chars.next();
930        }
931        result.push_str(&html);
932        previous = Some('>');
933        continue;
934      }
935    }
936
937    result.push(ch);
938    previous = Some(ch);
939  }
940
941  result
942}
943
944#[cfg(feature = "nixpkgs")]
945fn parse_bracketed_span(input: &str) -> Option<(String, usize)> {
946  let close_text = input.find(']')?;
947  if close_text == 0 {
948    return None;
949  }
950  let text = &input[..close_text];
951  let after_text = &input[close_text + 1..];
952  if !after_text.starts_with('{') {
953    return None;
954  }
955  let close_attrs = after_text.find('}')?;
956  let attrs = &after_text[1..close_attrs];
957  let html_attrs = render_span_attrs(attrs)?;
958  let html = format!("<span{html_attrs}>{}</span>", encode_text(text));
959  Some((html, close_text + 1 + close_attrs + 1))
960}
961
962#[cfg(feature = "nixpkgs")]
963fn render_span_attrs(attrs: &str) -> Option<String> {
964  let mut id = None;
965  let mut classes = Vec::new();
966  let mut pairs = Vec::new();
967
968  for attr in attrs.split_whitespace() {
969    if let Some(value) = attr.strip_prefix('#') {
970      if !value.is_empty() {
971        id = Some(value);
972      }
973    } else if let Some(value) = attr.strip_prefix('.') {
974      if !value.is_empty() {
975        classes.push(value);
976      }
977    } else if let Some((key, value)) = attr.split_once('=')
978      && key
979        .chars()
980        .all(|ch| ch.is_ascii_alphanumeric() || ch == '-' || ch == '_')
981    {
982      pairs.push((key, value.trim_matches('"')));
983    }
984  }
985
986  if id.is_none() && classes.is_empty() && pairs.is_empty() {
987    return None;
988  }
989
990  let mut rendered = String::new();
991  if let Some(id) = id {
992    let _ = write!(rendered, " id=\"{}\"", encode_double_quoted_attribute(id));
993  }
994  if !classes.is_empty() {
995    let _ = write!(
996      rendered,
997      " class=\"{}\"",
998      encode_double_quoted_attribute(&classes.join(" "))
999    );
1000  }
1001  for (key, value) in pairs {
1002    let _ = write!(
1003      rendered,
1004      " {key}=\"{}\"",
1005      encode_double_quoted_attribute(value)
1006    );
1007  }
1008
1009  Some(rendered)
1010}
1011
1012/// Find if a line starts with a list marker followed by an anchor.
1013fn find_list_item_anchor(trimmed: &str) -> Option<usize> {
1014  // Check for unordered list: "- []{#id}" or "* []{#id}" or "+ []{#id}"
1015  if (trimmed.starts_with("- ")
1016    || trimmed.starts_with("* ")
1017    || trimmed.starts_with("+ "))
1018    && trimmed.len() > 2
1019  {
1020    let after_marker = &trimmed[2..];
1021    if after_marker.starts_with("[]{#") {
1022      return Some(2);
1023    }
1024  }
1025
1026  // Check for ordered list: "1. []{#id}" or "123. []{#id}".
1027  let digit_end = trimmed
1028    .char_indices()
1029    .find(|(_, c)| !c.is_ascii_digit())
1030    .map_or(trimmed.len(), |(i, _)| i);
1031  if digit_end > 0
1032    && digit_end < trimmed.len() - 1
1033    && trimmed.as_bytes().get(digit_end) == Some(&b'.')
1034  {
1035    let after_marker = &trimmed[digit_end + 1..];
1036    if after_marker.starts_with(" []{#") {
1037      return Some(digit_end + 2);
1038    }
1039  }
1040
1041  None
1042}
1043
1044/// Process a list item line that contains an anchor.
1045fn process_list_item_anchor(line: &str, anchor_start: usize) -> Option<String> {
1046  let before_anchor = &line[..anchor_start];
1047  let after_marker = &line[anchor_start..];
1048
1049  if !after_marker.starts_with("[]{#") {
1050    return None;
1051  }
1052
1053  // Find the end of the anchor: []{#id}
1054  if let Some(anchor_end) = after_marker.find('}') {
1055    let id = &after_marker[4..anchor_end]; // skip "[]{#" and take until '}'
1056    let remaining_content = &after_marker[anchor_end + 1..]; // skip '}'
1057
1058    // Validate ID contains only allowed characters
1059    if id
1060      .chars()
1061      .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
1062      && !id.is_empty()
1063    {
1064      return Some(format!(
1065        "{before_anchor}<span id=\"{id}\" \
1066         class=\"nixos-anchor\"></span>{remaining_content}"
1067      ));
1068    }
1069  }
1070
1071  None
1072}
1073
1074/// Process inline anchors in a single line.
1075#[expect(
1076  clippy::excessive_nesting,
1077  reason = "complex character parsing algorithm, refactoring would reduce \
1078            readability"
1079)]
1080fn process_line_anchors(line: &str) -> String {
1081  let mut result = String::with_capacity(line.len());
1082  let mut chars = line.chars().peekable();
1083  let mut tracker = crate::utils::codeblock::InlineTracker::new();
1084
1085  while let Some(ch) = chars.next() {
1086    if ch == '`' {
1087      let (new_tracker, tick_count) = tracker.process_backticks(&mut chars);
1088      tracker = new_tracker;
1089      result.push_str(&"`".repeat(tick_count));
1090      continue;
1091    }
1092
1093    if ch == '[' && chars.peek() == Some(&']') && !tracker.in_any_code() {
1094      chars.next(); // consume ']'
1095
1096      // Check for {#id} pattern
1097      if chars.peek() == Some(&'{') {
1098        chars.next(); // consume '{'
1099        if chars.peek() == Some(&'#') {
1100          chars.next(); // consume '#'
1101
1102          // Collect the ID
1103          let mut id = String::new();
1104          while let Some(&next_ch) = chars.peek() {
1105            if next_ch == '}' {
1106              chars.next(); // consume '}'
1107
1108              // Validate ID and create span
1109              if !id.is_empty()
1110                && id
1111                  .chars()
1112                  .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
1113              {
1114                let _ = write!(
1115                  result,
1116                  "<span id=\"{id}\" class=\"nixos-anchor\"></span>"
1117                );
1118              } else {
1119                // Invalid ID, put back original text
1120                let _ = write!(result, "[]{{{{#{id}}}}}");
1121              }
1122              break;
1123            } else if next_ch.is_ascii_alphanumeric()
1124              || next_ch == '-'
1125              || next_ch == '_'
1126            {
1127              id.push(next_ch);
1128              chars.next();
1129            } else {
1130              // Invalid character, put back original text
1131              let _ = write!(result, "[]{{{{#{id}");
1132              break;
1133            }
1134          }
1135        } else {
1136          // Not an anchor, put back consumed characters
1137          result.push_str("]{");
1138        }
1139      } else {
1140        // Not an anchor, put back consumed character
1141        result.push(']');
1142      }
1143    } else {
1144      result.push(ch);
1145    }
1146  }
1147
1148  result
1149}
1150
1151/// Process block elements in markdown content.
1152///
1153/// This function processes block elements including admonitions, figures, and
1154/// definition lists while being code-block aware to avoid processing inside
1155/// code fences.
1156///
1157/// # Arguments
1158///
1159/// * `content` - The markdown content to process
1160///
1161/// # Returns
1162///
1163/// The processed markdown with block elements converted to HTML
1164#[cfg(feature = "nixpkgs")]
1165#[must_use]
1166pub fn process_block_elements(content: &str) -> String {
1167  let mut result = Vec::new();
1168  let mut lines = content.lines().peekable();
1169  let mut fence_tracker = crate::utils::codeblock::FenceTracker::new();
1170
1171  while let Some(line) = lines.next() {
1172    if line.trim() == INCLUDE_BOUNDARY_MARKER {
1173      continue;
1174    }
1175
1176    // Update fence tracking state
1177    fence_tracker = fence_tracker.process_line(line);
1178
1179    // Only process block elements if we're not in a code block
1180    if !fence_tracker.in_code_block() {
1181      // Check for GitHub-style callouts: > [!TYPE]
1182      if let Some((callout_type, initial_content)) = parse_github_callout(line)
1183      {
1184        let content =
1185          collect_github_callout_content(&mut lines, &initial_content);
1186        let admonition = render_admonition(&callout_type, None, &content);
1187        result.push(admonition);
1188        continue;
1189      }
1190
1191      // Check for fenced admonitions: ::: {.type}
1192      if let Some(admonition_start) = parse_fenced_admonition_start(line) {
1193        let indent = leading_whitespace(line);
1194        let (content, trailing) = collect_fenced_content(
1195          &mut lines,
1196          indent,
1197          admonition_start.fence_len,
1198        );
1199        let content = process_block_elements(&content);
1200        let admonition = indent_block(
1201          &render_admonition(
1202            &admonition_start.adm_type,
1203            admonition_start.id.as_deref(),
1204            &content,
1205          ),
1206          indent,
1207        );
1208        result.push(admonition);
1209        // If there's trailing content after the closing :::, add it as a new
1210        // line
1211        if let Some(trailing_content) = trailing {
1212          result.push(trailing_content);
1213        }
1214        continue;
1215      }
1216
1217      // Check for figures: ::: {.figure #id}
1218      if let Some((id, title, content)) = parse_figure_block(line, &mut lines) {
1219        let figure = render_figure(id.as_deref(), &title, &content);
1220        result.push(figure);
1221        continue;
1222      }
1223    }
1224
1225    // Regular line, keep as-is
1226    result.push(line.to_string());
1227  }
1228
1229  result.join("\n")
1230}
1231
1232/// Parse GitHub-style callout syntax: > [!TYPE] content
1233fn parse_github_callout(line: &str) -> Option<(String, String)> {
1234  let trimmed = line.trim_start();
1235  if !trimmed.starts_with("> [!") {
1236    return None;
1237  }
1238
1239  // Find the closing bracket
1240  if let Some(close_bracket) = trimmed.find(']')
1241    && close_bracket > 4
1242  {
1243    let callout_type = &trimmed[4..close_bracket];
1244
1245    // Validate callout type
1246    match callout_type {
1247      "NOTE" | "TIP" | "IMPORTANT" | "WARNING" | "CAUTION" | "DANGER" => {
1248        let content = trimmed[close_bracket + 1..].trim();
1249        return Some((callout_type.to_lowercase(), content.to_string()));
1250      },
1251      _ => return None,
1252    }
1253  }
1254
1255  None
1256}
1257
1258/// Check if a line starts with a valid ATX header (1-6 '#' followed by
1259/// whitespace or EOL).
1260///
1261/// Per `CommonMark` spec, an ATX header requires 1-6 '#' characters followed by
1262/// either:
1263///
1264/// - A whitespace character (space, tab, etc.)
1265/// - End of line (the string ends)
1266///
1267/// # Arguments
1268///
1269/// * `line` - The line to check
1270///
1271/// # Returns
1272///
1273/// `true` if the line starts with a valid ATX header marker
1274fn is_atx_header(line: &str) -> bool {
1275  let mut chars = line.chars();
1276  let mut hash_count = 0;
1277
1278  // Count leading '#' characters (max 6)
1279  while let Some(c) = chars.next() {
1280    if c == '#' {
1281      hash_count += 1;
1282      if hash_count > 6 {
1283        return false;
1284      }
1285    } else {
1286      // Found a non-'#' character, check if it's whitespace or we're at EOL
1287      return (1..=6).contains(&hash_count)
1288        && (c.is_whitespace() || chars.as_str().is_empty());
1289    }
1290  }
1291
1292  // Reached end of string, check if we have 1-6 hashes
1293  (1..=6).contains(&hash_count)
1294}
1295
1296/// Collect content for GitHub-style callouts
1297fn collect_github_callout_content(
1298  lines: &mut std::iter::Peekable<std::str::Lines>,
1299  initial_content: &str,
1300) -> String {
1301  let mut content = String::new();
1302
1303  if !initial_content.is_empty() {
1304    content.push_str(initial_content);
1305    content.push('\n');
1306  }
1307
1308  while let Some(line) = lines.peek() {
1309    let trimmed = line.trim_start();
1310
1311    // Empty line ends the blockquote
1312    if trimmed.is_empty() {
1313      break;
1314    }
1315
1316    // Check if this is a continuation line with `>`
1317    let content_part = if trimmed.starts_with('>') {
1318      trimmed.strip_prefix('>').unwrap_or("").trim_start()
1319    } else {
1320      // Check if this line starts a new block element that cannot be
1321      // lazy-continued ATX headers, setext header underlines, code
1322      // fences, and thematic breaks
1323      let starts_new_block = is_atx_header(trimmed)
1324        || trimmed.starts_with("```")
1325        || trimmed.starts_with("~~~")
1326        || (trimmed.starts_with("---")
1327          && trimmed.chars().all(|c| c == '-' || c.is_whitespace()))
1328        || (trimmed.starts_with("===")
1329          && trimmed.chars().all(|c| c == '=' || c.is_whitespace()))
1330        || (trimmed.starts_with("***")
1331          && trimmed.chars().all(|c| c == '*' || c.is_whitespace()));
1332
1333      if starts_new_block {
1334        break;
1335      }
1336
1337      // Lazy continuation
1338      // Mind you, "lazy" doesn't refer to me being lazy but the GFM feature for
1339      // a line without `>` that continues the blockquote
1340      // paragraph
1341      trimmed
1342    };
1343
1344    content.push_str(content_part);
1345    content.push('\n');
1346    lines.next(); // consume the line
1347  }
1348
1349  content.trim().to_string()
1350}
1351
1352/// Parse fenced admonition start: ::: {.type #id}
1353struct AdmonitionStart {
1354  adm_type:  String,
1355  id:        Option<String>,
1356  fence_len: usize,
1357}
1358
1359fn parse_fenced_admonition_start(line: &str) -> Option<AdmonitionStart> {
1360  let trimmed = line.trim();
1361  if !trimmed.starts_with(":::") {
1362    return None;
1363  }
1364
1365  let fence_len = trimmed.chars().take_while(|&ch| ch == ':').count();
1366  if fence_len < 3 {
1367    return None;
1368  }
1369
1370  let after_colons = trimmed[fence_len..].trim_start();
1371  if !after_colons.starts_with('{') {
1372    return None;
1373  }
1374
1375  // Find the closing brace
1376  if let Some(close_brace) = after_colons.find('}') {
1377    let content = &after_colons[1..close_brace]; // skip "{"
1378
1379    // Parse type and optional ID. Attribute lists allow either order:
1380    // `{.warning #id}` and `{#id .warning}` are equivalent.
1381    let mut first_class = None;
1382    let mut adm_type = None;
1383    let mut id = None;
1384    for part in content.split_whitespace() {
1385      if let Some(value) = part.strip_prefix('.') {
1386        let value = value.to_ascii_lowercase();
1387        first_class.get_or_insert_with(|| value.clone());
1388        if matches!(
1389          value.as_str(),
1390          "note" | "tip" | "important" | "warning" | "caution" | "danger"
1391        ) {
1392          adm_type.get_or_insert(value);
1393        }
1394      } else if let Some(value) = part.strip_prefix('#') {
1395        id.get_or_insert_with(|| value.to_string());
1396      }
1397    }
1398
1399    if let Some(adm_type) = adm_type.or(first_class) {
1400      return Some(AdmonitionStart {
1401        adm_type,
1402        id,
1403        fence_len,
1404      });
1405    }
1406  }
1407
1408  None
1409}
1410
1411fn leading_whitespace(line: &str) -> &str {
1412  let end = line
1413    .char_indices()
1414    .find_map(|(idx, ch)| (!ch.is_whitespace()).then_some(idx))
1415    .unwrap_or(line.len());
1416  &line[..end]
1417}
1418
1419fn strip_indent<'a>(line: &'a str, indent: &str) -> &'a str {
1420  line.strip_prefix(indent).unwrap_or(line)
1421}
1422
1423fn indent_block(block: &str, indent: &str) -> String {
1424  if indent.is_empty() {
1425    return block.to_string();
1426  }
1427
1428  block
1429    .lines()
1430    .map(|line| format!("{indent}{line}"))
1431    .collect::<Vec<_>>()
1432    .join("\n")
1433}
1434
1435/// Collect content until closing :::
1436///
1437/// # Returns
1438///
1439/// Tuple of (`admonition_content`, `trailing_content`). If there's content
1440/// after the closing `:::` on the same line, it's returned as
1441/// `trailing_content`.
1442fn collect_fenced_content(
1443  lines: &mut std::iter::Peekable<std::str::Lines>,
1444  indent: &str,
1445  fence_len: usize,
1446) -> (String, Option<String>) {
1447  let mut content = String::new();
1448
1449  for line in lines.by_ref() {
1450    let line = strip_indent(line, indent);
1451    let trimmed = line.trim();
1452    if trimmed == INCLUDE_BOUNDARY_MARKER {
1453      return (content.trim().to_string(), None);
1454    }
1455
1456    let closing_len = trimmed.chars().take_while(|&ch| ch == ':').count();
1457    if closing_len >= fence_len {
1458      // check if there's content after the closing :::
1459      let after_colons = &trimmed[closing_len..];
1460      if !after_colons.is_empty() {
1461        // there's trailing content on the same line as the closing delimiter
1462        return (content.trim().to_string(), Some(after_colons.to_string()));
1463      }
1464      break;
1465    }
1466    content.push_str(line);
1467    content.push('\n');
1468  }
1469
1470  (content.trim().to_string(), None)
1471}
1472
1473/// Parse figure block: ::: {.figure #id}
1474#[expect(
1475  clippy::option_if_let_else,
1476  reason = "Nested options clearer with if-let"
1477)]
1478fn parse_figure_block(
1479  line: &str,
1480  lines: &mut std::iter::Peekable<std::str::Lines>,
1481) -> Option<(Option<String>, String, String)> {
1482  let trimmed = line.trim();
1483  if !trimmed.starts_with(":::") {
1484    return None;
1485  }
1486
1487  let after_colons = trimmed[3..].trim_start();
1488  if !after_colons.starts_with("{.figure") {
1489    return None;
1490  }
1491
1492  // Extract ID if present
1493  let id = if let Some(hash_pos) = after_colons.find('#') {
1494    if let Some(close_brace) = after_colons.find('}') {
1495      if hash_pos < close_brace {
1496        Some(after_colons[hash_pos + 1..close_brace].trim().to_string())
1497      } else {
1498        None
1499      }
1500    } else {
1501      None
1502    }
1503  } else {
1504    None
1505  };
1506
1507  // Get title from next line (should start with #)
1508  let title = if let Some(title_line) = lines.next() {
1509    let trimmed_title = title_line.trim();
1510    if let Some(this) = trimmed_title.strip_prefix('#') {
1511      { this.trim_matches(char::is_whitespace) }.to_string()
1512    } else {
1513      // Put the line back if it's not a title
1514      return None;
1515    }
1516  } else {
1517    return None;
1518  };
1519
1520  // Collect figure content
1521  let mut content = String::new();
1522  for line in lines.by_ref() {
1523    let trimmed = line.trim();
1524    if trimmed == INCLUDE_BOUNDARY_MARKER || trimmed.starts_with(":::") {
1525      break;
1526    }
1527    content.push_str(line);
1528    content.push('\n');
1529  }
1530
1531  Some((id, title, content.trim().to_string()))
1532}
1533
1534/// Render an admonition as HTML
1535fn render_admonition(
1536  adm_type: &str,
1537  id: Option<&str>,
1538  content: &str,
1539) -> String {
1540  let capitalized_type = crate::utils::capitalize_first(adm_type);
1541  let id_attr = id.map_or(String::new(), |id| format!(" id=\"{id}\""));
1542
1543  let opening = format!(
1544    "<div class=\"admonition {adm_type}\"{id_attr}>\n<p \
1545     class=\"admonition-title\">{capitalized_type}</p>"
1546  );
1547  format!("{opening}\n\n{content}\n\n</div>\n")
1548}
1549
1550/// Render a figure as HTML
1551fn render_figure(id: Option<&str>, title: &str, content: &str) -> String {
1552  let id_attr = id.map_or(String::new(), |id| format!(" id=\"{id}\""));
1553
1554  format!(
1555    "<figure{id_attr}>\n<figcaption>{title}</figcaption>\n{content}\n</figure>"
1556  )
1557}
1558
1559/// Process manpage references in HTML content. Pocesses manpage references by
1560/// finding span elements with manpage-reference class and converting them to
1561/// links when URLs are available.
1562///
1563/// # Arguments
1564///
1565/// * `html` - The HTML content to process
1566/// * `manpage_urls` - Optional mapping of manpage names to URLs
1567///
1568/// # Returns
1569///
1570/// The processed HTML with manpage references converted to links
1571#[cfg(feature = "nixpkgs")]
1572#[must_use]
1573#[expect(
1574  clippy::implicit_hasher,
1575  reason = "Standard HashMap sufficient for this use case"
1576)]
1577pub fn process_manpage_references(
1578  html: &str,
1579  manpage_urls: Option<&rustc_hash::FxHashMap<String, String>>,
1580) -> String {
1581  process_safe(
1582    html,
1583    |html| {
1584      use kuchikikiki::NodeRef;
1585      use tendril::TendrilSink;
1586
1587      let document = kuchikikiki::parse_html().one(html);
1588      let mut to_replace = Vec::new();
1589
1590      // Find all spans with class "manpage-reference"
1591      for span_node in safe_select(&document, "span.manpage-reference") {
1592        let span_el = span_node;
1593        let span_text = span_el.text_contents();
1594
1595        if let Some(urls) = manpage_urls {
1596          // Check for direct URL match
1597          if let Some(url) = urls.get(&span_text) {
1598            let clean_url = extract_url_from_html(url);
1599            let link = NodeRef::new_element(
1600              markup5ever::QualName::new(
1601                None,
1602                markup5ever::ns!(html),
1603                markup5ever::local_name!("a"),
1604              ),
1605              vec![
1606                (
1607                  kuchikikiki::ExpandedName::new("", "href"),
1608                  kuchikikiki::Attribute {
1609                    prefix: None,
1610                    value:  clean_url.into(),
1611                  },
1612                ),
1613                (
1614                  kuchikikiki::ExpandedName::new("", "class"),
1615                  kuchikikiki::Attribute {
1616                    prefix: None,
1617                    value:  "manpage-reference".into(),
1618                  },
1619                ),
1620              ],
1621            );
1622            link.append(NodeRef::new_text(span_text.clone()));
1623            to_replace.push((span_el.clone(), link));
1624          }
1625        }
1626      }
1627
1628      // Apply replacements
1629      for (old, new) in to_replace {
1630        old.insert_before(new);
1631        old.detach();
1632      }
1633
1634      let mut out = Vec::new();
1635      let _ = document.serialize(&mut out);
1636      String::from_utf8(out).unwrap_or_else(|_| html.to_string())
1637    },
1638    // Return original HTML on error
1639    "",
1640  )
1641}
1642
1643/// Process option references
1644/// Converts {option} role markup into links to the options page.
1645///
1646/// This processes `<code>` elements that have the `nixos-option` class, i.e.,
1647/// {option} role markup and convert them into links to the options page.
1648///
1649/// # Arguments
1650///
1651/// * `html` - The HTML string to process.
1652/// * `valid_options` - Optional set of valid option names for validation.
1653///
1654/// # Returns
1655///
1656/// The HTML string with option references rewritten as links.
1657#[cfg(feature = "ndg-flavored")]
1658#[must_use]
1659#[expect(
1660  clippy::implicit_hasher,
1661  reason = "Standard HashSet sufficient for this use case"
1662)]
1663pub fn process_option_references(
1664  html: &str,
1665  valid_options: Option<&rustc_hash::FxHashSet<String>>,
1666) -> String {
1667  use kuchikikiki::{Attribute, ExpandedName, NodeRef};
1668  use markup5ever::{QualName, local_name, ns};
1669  use tendril::TendrilSink;
1670
1671  process_safe(
1672    html,
1673    |html| {
1674      let document = kuchikikiki::parse_html().one(html);
1675
1676      let mut to_replace = vec![];
1677
1678      // Only process code elements that already have the nixos-option class
1679      // from {option} role syntax
1680      for code_node in safe_select(&document, "code.nixos-option") {
1681        let code_el = code_node;
1682        let code_text = code_el.text_contents();
1683
1684        // Skip if already wrapped in an option-reference link
1685        let mut is_already_option_ref = false;
1686        let mut current = code_el.parent();
1687        while let Some(parent) = current {
1688          if let Some(element) = parent.as_element()
1689            && element.name.local == local_name!("a")
1690            && let Some(class_attr) =
1691              element.attributes.borrow().get(local_name!("class"))
1692            && class_attr.contains("option-reference")
1693          {
1694            is_already_option_ref = true;
1695            break;
1696          }
1697          current = parent.parent();
1698        }
1699
1700        if !is_already_option_ref {
1701          // Check if validation is enabled and option is valid. If no
1702          // validation set, link all options
1703          let should_link =
1704            valid_options.is_none_or(|opts| opts.contains(code_text.as_str()));
1705
1706          if should_link {
1707            let option_id = sanitize_option_id(code_text.as_str());
1708            let attrs = vec![
1709              (ExpandedName::new("", "href"), Attribute {
1710                prefix: None,
1711                value:  format!("options.html#{option_id}"),
1712              }),
1713              (ExpandedName::new("", "class"), Attribute {
1714                prefix: None,
1715                value:  "option-reference".into(),
1716              }),
1717            ];
1718            let a = NodeRef::new_element(
1719              QualName::new(None, ns!(html), local_name!("a")),
1720              attrs,
1721            );
1722            let code = NodeRef::new_element(
1723              QualName::new(None, ns!(html), local_name!("code")),
1724              vec![],
1725            );
1726            code.append(NodeRef::new_text(code_text.clone()));
1727            a.append(code);
1728            to_replace.push((code_el.clone(), a));
1729          }
1730          // If should_link is false, leave the code element as-is (no wrapping)
1731        }
1732      }
1733
1734      for (old, new) in to_replace {
1735        old.insert_before(new);
1736        old.detach();
1737      }
1738
1739      let mut out = Vec::new();
1740      let _ = document.serialize(&mut out);
1741      String::from_utf8(out).unwrap_or_else(|_| html.to_string())
1742    },
1743    // Return original HTML on error
1744    "",
1745  )
1746}
1747
1748/// Extract URL from HTML anchor tag or return the string as-is if it's a plain
1749/// URL
1750fn extract_url_from_html(url_or_html: &str) -> &str {
1751  // Check if it looks like HTML (starts with <a href=")
1752  if url_or_html.starts_with("<a href=\"") {
1753    // Extract the URL from href attribute
1754    if let Some(start) = url_or_html.find("href=\"") {
1755      let start = start + 6; // Skip 'href="'
1756      if let Some(end) = url_or_html[start..].find('"') {
1757        return &url_or_html[start..start + end];
1758      }
1759    }
1760  }
1761
1762  // Return as-is if not HTML or if extraction fails
1763  url_or_html
1764}
1765
1766/// Process wikilinks and Obsidian-style links in markdown content.
1767///
1768/// Converts:
1769///
1770/// - `[[page]]` (Obsidian link) -> `[page](page.html)`
1771/// - `[[name|url]]` (Wiki link) -> `[name](url)`
1772///
1773/// Being code-block aware to avoid processing inside fenced code blocks.
1774///
1775/// # Arguments
1776///
1777/// * `content` - The markdown content to process
1778///
1779/// # Returns
1780///
1781/// The processed markdown with wiki/Obsidian links converted to HTML
1782#[cfg(feature = "wiki")]
1783#[must_use]
1784pub fn process_wikilinks(content: &str) -> String {
1785  use crate::utils::codeblock::FenceTracker;
1786
1787  let mut result = String::with_capacity(content.len());
1788  let lines = content.lines();
1789  let mut tracker = FenceTracker::new();
1790
1791  for line in lines {
1792    tracker = tracker.process_line(line);
1793
1794    if tracker.in_code_block() {
1795      result.push_str(line);
1796    } else {
1797      result.push_str(&process_line_wikilinks(line));
1798    }
1799    result.push('\n');
1800  }
1801
1802  result.trim_end().to_string()
1803}
1804
1805/// Process wikilinks in a single line.
1806#[cfg(feature = "wiki")]
1807fn process_line_wikilinks(line: &str) -> String {
1808  let mut result = String::with_capacity(line.len());
1809  let mut chars = line.chars().peekable();
1810
1811  while let Some(ch) = chars.next() {
1812    if ch == '[' && chars.peek() == Some(&'[') {
1813      chars.next();
1814
1815      let mut inner = String::new();
1816      let mut found_double_close = false;
1817
1818      while let Some(&next_ch) = chars.peek() {
1819        chars.next();
1820        if next_ch == ']' && chars.peek() == Some(&']') {
1821          chars.next();
1822          found_double_close = true;
1823          break;
1824        }
1825        inner.push(next_ch);
1826      }
1827
1828      if found_double_close {
1829        if inner.is_empty() {
1830          result.push_str("[[]]");
1831        } else if inner.contains('|') {
1832          let parts: Vec<&str> = inner.splitn(2, '|').collect();
1833          let name = parts[0].trim();
1834          let url = parts.get(1).unwrap_or(&name).trim();
1835          let escaped_name = encode_text(name);
1836          let escaped_url = encode_text(url);
1837          let _ = write!(
1838            result,
1839            "<a href=\"{escaped_url}\" class=\"wikilink\">{escaped_name}</a>"
1840          );
1841        } else {
1842          let page = inner.trim();
1843          let escaped_page = encode_text(page);
1844          let link_target = format!("{page}.html");
1845          let _ = write!(
1846            result,
1847            "<a href=\"{link_target}\" \
1848             class=\"obsidian-link\">{escaped_page}</a>"
1849          );
1850        }
1851      } else {
1852        result.push_str("[[");
1853        result.push_str(&inner);
1854      }
1855    } else {
1856      result.push(ch);
1857    }
1858  }
1859
1860  result
1861}
1862
1863#[cfg(test)]
1864mod tests {
1865  use super::*;
1866
1867  #[test]
1868  fn test_is_atx_header_valid_headers() {
1869    // valid ATX headers with 1-6 hashes followed by space
1870    assert!(is_atx_header("# Header"));
1871    assert!(is_atx_header("## Header"));
1872    assert!(is_atx_header("### Header"));
1873    assert!(is_atx_header("#### Header"));
1874    assert!(is_atx_header("##### Header"));
1875    assert!(is_atx_header("###### Header"));
1876
1877    // valid ATX headers with tab after hashes
1878    assert!(is_atx_header("#\tHeader"));
1879    assert!(is_atx_header("##\tHeader"));
1880
1881    // valid ATX headers with just hashes (no content after)
1882    assert!(is_atx_header("#"));
1883    assert!(is_atx_header("##"));
1884    assert!(is_atx_header("###"));
1885    assert!(is_atx_header("####"));
1886    assert!(is_atx_header("#####"));
1887    assert!(is_atx_header("######"));
1888
1889    // valid ATX headers with multiple spaces
1890    assert!(is_atx_header("#  Header with multiple spaces"));
1891    assert!(is_atx_header("##   Header"));
1892  }
1893
1894  #[test]
1895  fn test_is_atx_header_invalid_headers() {
1896    // more than 6 hashes
1897    assert!(!is_atx_header("####### Too many hashes"));
1898    assert!(!is_atx_header("######## Even more"));
1899
1900    // no space after hash
1901    assert!(!is_atx_header("#NoSpace"));
1902    assert!(!is_atx_header("##NoSpace"));
1903
1904    // hash in the middle
1905    assert!(!is_atx_header("Not # a header"));
1906
1907    // empty string
1908    assert!(!is_atx_header(""));
1909
1910    // no hash at all
1911    assert!(!is_atx_header("Regular text"));
1912
1913    // hash with non-whitespace immediately after
1914    assert!(!is_atx_header("#hashtag"));
1915    assert!(!is_atx_header("##hashtag"));
1916    assert!(!is_atx_header("#123"));
1917    assert!(!is_atx_header("##abc"));
1918
1919    // special characters immediately after hash
1920    assert!(!is_atx_header("#!important"));
1921    assert!(!is_atx_header("#@mention"));
1922    assert!(!is_atx_header("#$variable"));
1923  }
1924
1925  #[test]
1926  fn test_is_atx_header_edge_cases() {
1927    // whitespace before hash is handled by caller (trimmed)
1928    // but testing it here to ensure robustness
1929    assert!(!is_atx_header(" # Header"));
1930    assert!(!is_atx_header("  ## Header"));
1931
1932    // only spaces after hash (should be valid)
1933    assert!(is_atx_header("#     "));
1934    assert!(is_atx_header("##    "));
1935
1936    // newline handling (string ends after valid header marker)
1937    assert!(is_atx_header("# Header\n"));
1938    assert!(is_atx_header("## Header\n"));
1939
1940    // mixed whitespace after hash
1941    assert!(is_atx_header("# \t  Header"));
1942    assert!(is_atx_header("##  \tHeader"));
1943  }
1944
1945  #[test]
1946  fn test_is_atx_header_blockquote_context() {
1947    // these are the types of strings that would be passed from
1948    // collect_github_callout_content after trim_start()
1949    assert!(is_atx_header("# New Section"));
1950    assert!(is_atx_header("## Subsection"));
1951
1952    // non-headers that should not break blockquote
1953    assert!(!is_atx_header("#tag"));
1954    assert!(!is_atx_header("##issue-123"));
1955    assert!(!is_atx_header("###no-space"));
1956
1957    // edge case: exactly 6 hashes (valid)
1958    assert!(is_atx_header("###### Level 6"));
1959
1960    // edge case: 7 hashes (invalid)
1961    assert!(!is_atx_header("####### Not valid"));
1962  }
1963
1964  #[cfg(feature = "wiki")]
1965  #[test]
1966  fn test_wikilink_obsidian_basic() {
1967    let input = "Check out [[Some Page]] for details.";
1968    let result = process_wikilinks(input);
1969    assert!(result.contains("href=\"Some Page.html\""));
1970    assert!(result.contains("class=\"obsidian-link\""));
1971    assert!(result.contains(">Some Page<"));
1972  }
1973
1974  #[cfg(feature = "wiki")]
1975  #[test]
1976  fn test_wikilink_with_url() {
1977    let input = "See [[Custom Name|https://example.com]]";
1978    let result = process_wikilinks(input);
1979    assert!(result.contains("href=\"https://example.com\""));
1980    assert!(result.contains("class=\"wikilink\""));
1981    assert!(result.contains(">Custom Name<"));
1982  }
1983
1984  #[cfg(feature = "wiki")]
1985  #[test]
1986  fn test_wikilink_with_spaces() {
1987    let input = "[[My Page Name]]";
1988    let result = process_wikilinks(input);
1989    assert!(result.contains("href=\"My Page Name.html\""));
1990  }
1991
1992  #[cfg(feature = "wiki")]
1993  #[test]
1994  fn test_wikilink_in_code_block() {
1995    let input = "```\n[[Wiki Link]]\n```\nThen [[Another]]";
1996    let result = process_wikilinks(input);
1997    assert!(result.contains("[[Wiki Link]]"));
1998    assert!(result.contains("href=\"Another.html\""));
1999  }
2000
2001  #[cfg(feature = "wiki")]
2002  #[test]
2003  fn test_wikilink_empty() {
2004    let input = "[[]]";
2005    let result = process_wikilinks(input);
2006    assert!(result.contains("[[]]"));
2007  }
2008
2009  #[cfg(feature = "wiki")]
2010  #[test]
2011  fn test_wikilink_malformed() {
2012    let input = "[[ incomplete";
2013    let result = process_wikilinks(input);
2014    assert!(result.contains("[[ incomplete"));
2015  }
2016
2017  #[cfg(feature = "wiki")]
2018  #[test]
2019  fn test_wikilink_html_escaping() {
2020    let input = "See [[Page With <script>]] for info";
2021    let result = process_wikilinks(input);
2022    assert!(result.contains("&lt;script&gt;"));
2023    assert!(!result.contains(">Page With <script><"));
2024  }
2025}
ndg_commonmark/processor/extensions.rs

ndg_commonmark/processor/
extensions.rs