Skip to main content

ndg_commonmark/processor/
extensions.rs

1//! Feature-specific Markdown processing extensions.
2use std::{fmt::Write, fs, path::Path};
3
4use html_escape::{encode_double_quoted_attribute, encode_text};
5
6use super::{dom::safe_select, process::process_safe};
7
8/// Sanitize an option name into a valid HTML ID, matching nixos-render-docs
9/// XML ID format.
10///
11/// Translates `*`, `<`, `>`, `[`, `]`, `:`, `"`, and space to `_`.
12/// Dots are preserved to match nixos-render-docs behavior.
13fn sanitize_option_id(name: &str) -> String {
14  let sanitized: String = name
15    .chars()
16    .map(|c| {
17      match c {
18        '*' | '<' | '>' | '[' | ']' | ':' | '"' | ' ' => '_',
19        c => c,
20      }
21    })
22    .collect();
23  format!("option-{sanitized}")
24}
25
26/// Apply GitHub Flavored Markdown (GFM) extensions to the input markdown.
27///
28/// This is a placeholder for future GFM-specific preprocessing or AST
29/// transformations. In practice, most GFM features are enabled via comrak
30/// options, but additional logic (such as custom tables, task lists, etc.) can
31/// be added here.
32///
33/// # Arguments
34/// * `markdown` - The input markdown text
35///
36/// # Returns
37/// The processed markdown text with GFM extensions applied
38#[cfg(feature = "gfm")]
39#[must_use]
40pub fn apply_gfm_extensions(markdown: &str) -> String {
41  // XXX: Comrak already supports GFM, but if there is any feature in the spec
42  // that is not implemented as we'd like for it to be, we can add it here.
43  markdown.to_owned()
44}
45
46/// Maximum recursion depth for file includes to prevent infinite recursion.
47const MAX_INCLUDE_DEPTH: usize = 8;
48
49/// Internal sentinel inserted between included files before block processing.
50const INCLUDE_BOUNDARY_MARKER: &str = "<!-- ndg:include-boundary -->";
51
52/// Check if a path is safe for file inclusion (no parent directory traversal).
53#[cfg(feature = "nixpkgs")]
54fn is_safe_path(path: &str, _base_dir: &Path) -> bool {
55  let p = Path::new(path);
56  if path.contains('\\') {
57    return false;
58  }
59
60  // Reject any path containing parent directory components
61  for component in p.components() {
62    if matches!(component, std::path::Component::ParentDir) {
63      return false;
64    }
65  }
66
67  true
68}
69
70/// Parse the custom output directive from an include block.
71#[cfg(feature = "nixpkgs")]
72struct IncludeDirective {
73  custom_output:  Option<String>,
74  include_type:   Option<String>,
75  auto_id_prefix: Option<String>,
76}
77
78#[cfg(feature = "nixpkgs")]
79fn parse_include_directive(line: &str) -> IncludeDirective {
80  let after_marker = line.strip_prefix("```{=include=}").unwrap_or(line).trim();
81  let include_type = after_marker
82    .split_whitespace()
83    .find(|part| {
84      !part.starts_with("html:into-file=")
85        && !part.starts_with("auto-id-prefix=")
86    })
87    .map(str::to_string);
88
89  let custom_output = directive_value(line, "html:into-file=");
90  let auto_id_prefix = directive_value(line, "auto-id-prefix=");
91
92  IncludeDirective {
93    custom_output,
94    include_type,
95    auto_id_prefix,
96  }
97}
98
99#[cfg(feature = "nixpkgs")]
100fn directive_value(line: &str, marker: &str) -> Option<String> {
101  line.find(marker).map(|start| {
102    let start = start + marker.len();
103    line[start..].find(' ').map_or_else(
104      || line[start..].trim().to_string(),
105      |end| line[start..start + end].to_string(),
106    )
107  })
108}
109
110#[cfg(feature = "nixpkgs")]
111fn apply_auto_id_prefix(content: &str, prefix: &str) -> String {
112  if prefix.is_empty() {
113    return content.to_string();
114  }
115
116  let mut result = String::with_capacity(content.len());
117  let mut fence_tracker = crate::utils::codeblock::FenceTracker::new();
118  let mut heading_numbers = Vec::new();
119
120  for line in content.lines() {
121    fence_tracker = fence_tracker.process_line(line);
122    if fence_tracker.in_code_block() {
123      result.push_str(line);
124    } else if let Some(line) =
125      add_auto_id_to_heading(line, prefix, &mut heading_numbers)
126    {
127      result.push_str(&line);
128    } else {
129      result.push_str(line);
130    }
131    result.push('\n');
132  }
133
134  result
135}
136
137#[cfg(feature = "nixpkgs")]
138fn add_auto_id_to_heading(
139  line: &str,
140  prefix: &str,
141  heading_numbers: &mut Vec<usize>,
142) -> Option<String> {
143  let leading_len = line.len() - line.trim_start().len();
144  if leading_len > 3 {
145    return None;
146  }
147
148  let trimmed = line.trim_start();
149  let level = trimmed.chars().take_while(|&ch| ch == '#').count();
150  if !(1..=6).contains(&level) {
151    return None;
152  }
153
154  let after_hashes = &trimmed[level..];
155  if !after_hashes.is_empty() && !after_hashes.starts_with(char::is_whitespace)
156  {
157    return None;
158  }
159
160  let heading = after_hashes.trim();
161  if heading.is_empty() {
162    return None;
163  }
164
165  if level > heading_numbers.len() {
166    heading_numbers.resize(level, 0);
167  }
168  heading_numbers.truncate(level);
169  heading_numbers[level - 1] += 1;
170
171  if heading.contains("{#") {
172    return None;
173  }
174
175  let id = heading_numbers
176    .iter()
177    .map(usize::to_string)
178    .collect::<Vec<_>>()
179    .join(".");
180
181  Some(format!(
182    "{}{} {{#{}-{}}}",
183    &line[..leading_len],
184    trimmed,
185    prefix,
186    id
187  ))
188}
189
190#[cfg(feature = "nixpkgs")]
191fn render_options_include(content: &str) -> Option<String> {
192  let data: serde_json::Value = serde_json::from_str(content).ok()?;
193  let options = data.as_object()?;
194  let mut result = String::new();
195
196  for (name, value) in options {
197    let option_data = value.as_object()?;
198    let option_id = sanitize_option_id(name);
199    let _ = writeln!(
200      result,
201      "<div class=\"option\" id=\"{}\">",
202      encode_double_quoted_attribute(&option_id)
203    );
204    let _ = writeln!(
205      result,
206      "  <h3 class=\"option-name\"><a href=\"#{}\" \
207       class=\"option-anchor\">{}</a></h3>",
208      encode_double_quoted_attribute(&option_id),
209      encode_text(name)
210    );
211
212    if let Some(type_name) = option_data.get("type").and_then(|v| v.as_str()) {
213      let _ = writeln!(
214        result,
215        "  <div class=\"option-type\">Type: <code>{}</code></div>",
216        encode_text(type_name)
217      );
218    }
219
220    if let Some(description) = option_data.get("description") {
221      let description = match description {
222        serde_json::Value::String(value) => value.as_str(),
223        serde_json::Value::Object(object)
224          if object.get("_type").and_then(|v| v.as_str())
225            == Some("literalMD") =>
226        {
227          object.get("text").and_then(|v| v.as_str()).unwrap_or("")
228        },
229        _ => "",
230      };
231
232      if !description.is_empty() {
233        let _ = writeln!(
234          result,
235          "  <div class=\"option-description\">{}</div>",
236          encode_text(description)
237        );
238      }
239    }
240
241    result.push_str("</div>\n");
242  }
243
244  Some(result)
245}
246
247#[cfg(feature = "nixpkgs")]
248fn read_options_includes(
249  listing: &str,
250  base_dir: &Path,
251  included_files: &mut Vec<crate::types::IncludedFile>,
252) -> String {
253  if let Some(source) = parse_options_source(listing) {
254    return read_options_file(&source, base_dir, included_files);
255  }
256
257  let mut result = String::new();
258
259  for line in listing.lines() {
260    let trimmed = line.trim();
261    if trimmed.is_empty() || !is_safe_path(trimmed, base_dir) {
262      continue;
263    }
264
265    let full_path = base_dir.join(trimmed);
266    match fs::read_to_string(&full_path) {
267      Ok(content) => {
268        if let Some(rendered) = render_options_include(&content) {
269          result.push_str(&rendered);
270        } else {
271          let _ = writeln!(
272            result,
273            "<!-- ndg: could not parse options include: {} -->",
274            full_path.display()
275          );
276        }
277        included_files.push(crate::types::IncludedFile {
278          path:          trimmed.to_string(),
279          custom_output: None,
280        });
281      },
282      Err(_) => {
283        let _ = writeln!(
284          result,
285          "<!-- ndg: could not include file: {} -->",
286          full_path.display()
287        );
288      },
289    }
290  }
291
292  result
293}
294
295#[cfg(feature = "nixpkgs")]
296fn parse_options_source(listing: &str) -> Option<String> {
297  let mut source = None;
298  for line in listing.lines() {
299    let (key, value) = line.split_once(':')?;
300    if key.trim() == "source" {
301      source = Some(value.trim().to_string());
302    }
303  }
304  source
305}
306
307#[cfg(feature = "nixpkgs")]
308fn read_options_file(
309  source: &str,
310  base_dir: &Path,
311  included_files: &mut Vec<crate::types::IncludedFile>,
312) -> String {
313  let mut result = String::new();
314  if !is_safe_path(source, base_dir) {
315    return result;
316  }
317
318  let full_path = base_dir.join(source);
319  match fs::read_to_string(&full_path) {
320    Ok(content) => {
321      if let Some(rendered) = render_options_include(&content) {
322        result.push_str(&rendered);
323      } else {
324        let _ = writeln!(
325          result,
326          "<!-- ndg: could not parse options include: {} -->",
327          full_path.display()
328        );
329      }
330      included_files.push(crate::types::IncludedFile {
331        path:          source.to_string(),
332        custom_output: None,
333      });
334    },
335    Err(_) => {
336      let _ = writeln!(
337        result,
338        "<!-- ndg: could not include file: {} -->",
339        full_path.display()
340      );
341    },
342  }
343
344  result
345}
346
347/// Read and process files listed in an include block.
348#[cfg(feature = "nixpkgs")]
349#[allow(
350  clippy::needless_pass_by_value,
351  reason = "Owned value needed for cloning in loop"
352)]
353fn read_includes(
354  listing: &str,
355  base_dir: &Path,
356  custom_output: Option<String>,
357  auto_id_prefix: Option<String>,
358  included_files: &mut Vec<crate::types::IncludedFile>,
359  depth: usize,
360) -> Result<String, String> {
361  let mut result = String::new();
362
363  for (line_index, line) in listing.lines().enumerate() {
364    let trimmed = line.trim();
365    if trimmed.is_empty() || !is_safe_path(trimmed, base_dir) {
366      continue;
367    }
368    let full_path = base_dir.join(trimmed);
369    log::info!("Including file: {}", full_path.display());
370
371    match fs::read_to_string(&full_path) {
372      Ok(content) => {
373        let file_dir = full_path.parent().unwrap_or(base_dir);
374        let (processed_content, nested_includes) =
375          process_file_includes(&content, file_dir, depth + 1)?;
376
377        let processed_content = if let Some(prefix) = auto_id_prefix.as_deref()
378        {
379          apply_auto_id_prefix(
380            &processed_content,
381            &format!("{}-{}", prefix, line_index + 1),
382          )
383        } else {
384          processed_content
385        };
386
387        if custom_output.is_none() {
388          result.push_str(&processed_content);
389          if !processed_content.ends_with('\n') {
390            result.push('\n');
391          }
392          result.push_str(INCLUDE_BOUNDARY_MARKER);
393          result.push('\n');
394        }
395
396        included_files.push(crate::types::IncludedFile {
397          path:          trimmed.to_string(),
398          custom_output: custom_output.clone(),
399        });
400
401        // Normalize nested include paths relative to original base_dir
402        for nested in nested_includes {
403          let nested_full_path = file_dir.join(&nested.path);
404          if let Ok(normalized_path) = nested_full_path.strip_prefix(base_dir) {
405            included_files.push(crate::types::IncludedFile {
406              path:          normalized_path.to_string_lossy().to_string(),
407              custom_output: nested.custom_output,
408            });
409          }
410        }
411      },
412      Err(_) => {
413        let _ = writeln!(
414          result,
415          "<!-- ndg: could not include file: {} -->",
416          full_path.display()
417        );
418      },
419    }
420  }
421  Ok(result)
422}
423
424/// Process file includes in Nixpkgs/NixOS documentation.
425///
426/// This function processes file include syntax:
427///
428/// ````markdown
429/// ```{=include=}
430/// path/to/file1.md
431/// path/to/file2.md
432/// ```
433/// ````
434///
435/// # Arguments
436///
437/// * `markdown` - The input markdown text
438/// * `base_dir` - The base directory for resolving relative file paths
439/// * `depth` - Current recursion depth (use 0 for initial call)
440///
441/// # Returns
442///
443/// Returns `Ok((processed_markdown, included_files))` where `included_files` is
444/// a list of all successfully included files.
445///
446/// # Errors
447///
448/// Returns `Err(message)` if recursion depth exceeds [`MAX_INCLUDE_DEPTH`],
449/// which likely indicates a circular include cycle.
450///
451/// # Safety
452///
453/// Only relative paths without ".." are allowed for security.
454#[cfg(feature = "nixpkgs")]
455pub fn process_file_includes(
456  markdown: &str,
457  base_dir: &std::path::Path,
458  depth: usize,
459) -> Result<(String, Vec<crate::types::IncludedFile>), String> {
460  // Check recursion depth limit
461  if depth >= MAX_INCLUDE_DEPTH {
462    return Err(format!(
463      "Maximum include recursion depth ({MAX_INCLUDE_DEPTH}) exceeded. This \
464       likely indicates a cycle in file includes."
465    ));
466  }
467
468  let mut output = String::new();
469  let mut lines = markdown.lines();
470  let mut fence_tracker = crate::utils::codeblock::FenceTracker::new();
471  let mut all_included_files: Vec<crate::types::IncludedFile> = Vec::new();
472
473  while let Some(line) = lines.next() {
474    if line.trim() == INCLUDE_BOUNDARY_MARKER {
475      continue;
476    }
477
478    let trimmed = line.trim_start();
479
480    if !fence_tracker.in_code_block() && trimmed.starts_with("```{=include=}") {
481      let directive = parse_include_directive(trimmed);
482
483      let mut include_listing = String::new();
484      for next_line in lines.by_ref() {
485        if next_line.trim_start().starts_with("```") {
486          break;
487        }
488        include_listing.push_str(next_line);
489        include_listing.push('\n');
490      }
491
492      let included = if directive.include_type.as_deref() == Some("options") {
493        read_options_includes(
494          &include_listing,
495          base_dir,
496          &mut all_included_files,
497        )
498      } else {
499        read_includes(
500          &include_listing,
501          base_dir,
502          directive.custom_output,
503          directive.auto_id_prefix,
504          &mut all_included_files,
505          depth,
506        )?
507      };
508      output.push_str(&included);
509      continue;
510    }
511
512    // Update fence tracking state
513    fence_tracker = fence_tracker.process_line(line);
514
515    output.push_str(line);
516    output.push('\n');
517  }
518
519  Ok((output, all_included_files))
520}
521
522/// Process role markup in markdown content.
523///
524/// This function processes role syntax like `{command}ls -la`
525///
526/// # Arguments
527///
528/// * `content` - The markdown content to process
529/// * `manpage_urls` - Optional mapping of manpage names to URLs
530/// * `auto_link_options` - Whether to convert {option} roles to links
531/// * `valid_options` - Optional set of valid option names for validation
532///
533/// # Returns
534///
535/// The processed markdown with role markup converted to HTML
536#[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
537#[must_use]
538#[allow(
539  clippy::implicit_hasher,
540  reason = "Standard HashMap/HashSet sufficient for this use case"
541)]
542pub fn process_role_markup(
543  content: &str,
544  manpage_urls: Option<&std::collections::HashMap<String, String>>,
545  auto_link_options: bool,
546  valid_options: Option<&std::collections::HashSet<String>>,
547) -> String {
548  let mut result = String::new();
549  let mut chars = content.chars().peekable();
550  let mut tracker = crate::utils::codeblock::InlineTracker::new();
551
552  while let Some(ch) = chars.next() {
553    // Handle backticks (code fences and inline code)
554    if ch == '`' {
555      let (new_tracker, tick_count) = tracker.process_backticks(&mut chars);
556      tracker = new_tracker;
557
558      // Add all the backticks
559      result.push_str(&"`".repeat(tick_count));
560      continue;
561    }
562
563    // Handle tilde code fences (~~~)
564    if ch == '~' && chars.peek() == Some(&'~') {
565      let (new_tracker, tilde_count) = tracker.process_tildes(&mut chars);
566      tracker = new_tracker;
567
568      result.push_str(&"~".repeat(tilde_count));
569      continue;
570    }
571
572    // Handle newlines
573    if ch == '\n' {
574      tracker = tracker.process_newline();
575      result.push(ch);
576      continue;
577    }
578
579    // Process role markup only if we're not in any kind of code
580    if ch == '{' && !tracker.in_any_code() {
581      // Collect remaining characters to test parsing
582      let remaining: Vec<char> = chars.clone().collect();
583      let remaining_str: String = remaining.iter().collect();
584      let mut temp_chars = remaining_str.chars().peekable();
585
586      if let Some(role_markup) = parse_role_markup(
587        &mut temp_chars,
588        manpage_urls,
589        auto_link_options,
590        valid_options,
591      ) {
592        // Valid role markup found, advance the main iterator
593        let remaining_after_parse: String = temp_chars.collect();
594        let consumed = remaining_str.len() - remaining_after_parse.len();
595        for _ in 0..consumed {
596          chars.next();
597        }
598        result.push_str(&role_markup);
599      } else {
600        // Not a valid role markup, keep the original character
601        result.push(ch);
602      }
603    } else {
604      result.push(ch);
605    }
606  }
607
608  result
609}
610
611/// Parse a role markup from the character iterator.
612///
613/// # Returns
614///
615/// `Some(html)` if a valid role markup is found, `None` otherwise.
616fn parse_role_markup(
617  chars: &mut std::iter::Peekable<std::str::Chars>,
618  manpage_urls: Option<&std::collections::HashMap<String, String>>,
619  auto_link_options: bool,
620  valid_options: Option<&std::collections::HashSet<String>>,
621) -> Option<String> {
622  let mut role_name = String::new();
623
624  // Parse role name (lowercase letters only)
625  while let Some(&ch) = chars.peek() {
626    if ch.is_ascii_lowercase() {
627      role_name.push(ch);
628      chars.next();
629    } else {
630      break;
631    }
632  }
633
634  // Must have a non-empty role name
635  if role_name.is_empty() {
636    return None;
637  }
638
639  // Expect closing brace
640  if chars.peek() != Some(&'}') {
641    return None;
642  }
643  chars.next(); // consume '}'
644
645  // Expect opening backtick
646  if chars.peek() != Some(&'`') {
647    return None;
648  }
649  chars.next(); // consume '`'
650
651  // Parse content until closing backtick
652  let mut content = String::new();
653  for ch in chars.by_ref() {
654    if ch == '`' {
655      // Found closing backtick, validate content
656      // Most role types should not have empty content
657      if content.is_empty() && !matches!(role_name.as_str(), "manpage") {
658        return None; // reject empty content for most roles
659      }
660      return Some(format_role_markup(
661        &role_name,
662        &content,
663        manpage_urls,
664        auto_link_options,
665        valid_options,
666      ));
667    }
668    content.push(ch);
669  }
670
671  // No closing backtick found
672  None
673}
674
675/// Format the role markup as HTML based on the role type and content.
676#[must_use]
677#[allow(
678  clippy::option_if_let_else,
679  reason = "Nested options clearer with if-let"
680)]
681#[allow(
682  clippy::implicit_hasher,
683  reason = "Standard HashMap/HashSet sufficient for this use case"
684)]
685pub fn format_role_markup(
686  role_type: &str,
687  content: &str,
688  manpage_urls: Option<&std::collections::HashMap<String, String>>,
689  auto_link_options: bool,
690  valid_options: Option<&std::collections::HashSet<String>>,
691) -> String {
692  let escaped_content = encode_text(content);
693  match role_type {
694    "manpage" => {
695      if let Some(urls) = manpage_urls {
696        if let Some(url) = urls.get(content) {
697          format!(
698            "<a href=\"{url}\" \
699             class=\"manpage-reference\">{escaped_content}</a>"
700          )
701        } else {
702          format!("<span class=\"manpage-reference\">{escaped_content}</span>")
703        }
704      } else {
705        format!("<span class=\"manpage-reference\">{escaped_content}</span>")
706      }
707    },
708    "command" => format!("<code class=\"command\">{escaped_content}</code>"),
709    "env" => format!("<code class=\"env-var\">{escaped_content}</code>"),
710    "file" => format!("<code class=\"file-path\">{escaped_content}</code>"),
711    "option" => {
712      if cfg!(feature = "ndg-flavored") && auto_link_options {
713        // Check if validation is enabled and option is valid
714        let should_link =
715          valid_options.is_none_or(|opts| opts.contains(content)); // If no validation set, link all options
716
717        if should_link {
718          let option_id = sanitize_option_id(content);
719          format!(
720            "<a class=\"option-reference\" \
721             href=\"options.html#{option_id}\"><code \
722             class=\"nixos-option\">{escaped_content}</code></a>"
723          )
724        } else {
725          format!("<code class=\"nixos-option\">{escaped_content}</code>")
726        }
727      } else {
728        format!("<code class=\"nixos-option\">{escaped_content}</code>")
729      }
730    },
731    "var" => format!("<code class=\"nix-var\">{escaped_content}</code>"),
732    _ => format!("<span class=\"{role_type}-markup\">{escaped_content}</span>"),
733  }
734}
735
736/// Process MyST-style autolinks in markdown content.
737///
738/// Converts MyST-like autolinks supported by Nixpkgs-flavored commonmark:
739/// - `[](#anchor)` -> `[](#anchor) -> {{ANCHOR}}` (placeholder for comrak)
740/// - `[](https://url)` -> `<https://url>` (converted to standard autolink)
741///
742/// # Arguments
743///
744/// * `content` - The markdown content to process
745///
746/// # Returns
747///
748/// The processed markdown with `MyST` autolinks converted as a [`String`]
749#[must_use]
750pub fn process_myst_autolinks(content: &str) -> String {
751  let mut result = String::with_capacity(content.len());
752  let mut fence_tracker = crate::utils::codeblock::FenceTracker::new();
753
754  for line in content.lines() {
755    // Update fence tracking state
756    fence_tracker = fence_tracker.process_line(line);
757
758    // Only process MyST autolinks if we're not in a code block
759    if fence_tracker.in_code_block() {
760      result.push_str(line);
761    } else {
762      result.push_str(&process_line_myst_autolinks(line));
763    }
764    result.push('\n');
765  }
766
767  result
768}
769
770/// Process `MyST` autolinks in a single line.
771fn process_line_myst_autolinks(line: &str) -> String {
772  let mut result = String::with_capacity(line.len());
773  let mut chars = line.chars().peekable();
774  let mut tracker = crate::utils::codeblock::InlineTracker::new();
775
776  while let Some(ch) = chars.next() {
777    if ch == '`' {
778      let (new_tracker, tick_count) = tracker.process_backticks(&mut chars);
779      tracker = new_tracker;
780      result.push_str(&"`".repeat(tick_count));
781      continue;
782    }
783
784    if ch == '[' && chars.peek() == Some(&']') && !tracker.in_any_code() {
785      chars.next(); // consume ']'
786
787      // Check if this is []{#...} syntax (inline anchor, not autolink)
788      // Nice pit, would be a shame if someone was to... fall into it.
789      if chars.peek() == Some(&'{') {
790        // This is inline anchor syntax, not autolink, keep as-is
791        result.push_str("[]");
792        continue;
793      }
794
795      if chars.peek() == Some(&'(') {
796        chars.next(); // consume '('
797
798        // Collect URL until ')'
799        let mut url = String::new();
800        let mut found_closing = false;
801        while let Some(&next_ch) = chars.peek() {
802          if next_ch == ')' {
803            chars.next(); // consume ')'
804            found_closing = true;
805            break;
806          }
807          url.push(next_ch);
808          chars.next();
809        }
810
811        if found_closing && !url.is_empty() {
812          // Check if it's an anchor link (starts with #) or a URL
813          if url.starts_with('#') {
814            // Add placeholder text for comrak to parse it as a link
815            let _ = write!(result, "[{{{{ANCHOR}}}}]({url})");
816          } else if url.starts_with("http://") || url.starts_with("https://") {
817            // Convert URL autolinks to standard <url> format
818            let _ = write!(result, "<{url}>");
819          } else {
820            // Keep other patterns as-is
821            let _ = write!(result, "[]({url})");
822          }
823        } else {
824          // Malformed, put back what we consumed
825          result.push_str("](");
826          result.push_str(&url);
827        }
828      } else {
829        // Not a link, put back consumed character
830        result.push(']');
831      }
832    } else {
833      result.push(ch);
834    }
835  }
836
837  result
838}
839
840/// Process inline anchors in markdown content.
841///
842/// This function processes inline anchor syntax like `[]{#my-anchor}` while
843/// being code-block aware to avoid processing inside code fences.
844///
845/// # Arguments
846///
847/// * `content` - The markdown content to process
848///
849/// # Returns
850///
851/// The processed markdown with inline anchors converted to HTML spans
852#[cfg(feature = "nixpkgs")]
853#[must_use]
854pub fn process_inline_anchors(content: &str) -> String {
855  let mut result = String::with_capacity(content.len() + 100);
856  let mut fence_tracker = crate::utils::codeblock::FenceTracker::new();
857
858  for line in content.lines() {
859    let trimmed = line.trim_start();
860
861    // Update fence tracking state
862    fence_tracker = fence_tracker.process_line(line);
863
864    // Only process inline anchors if we're not in a code block
865    if fence_tracker.in_code_block() {
866      // In code block, keep line as-is
867      result.push_str(line);
868    } else {
869      // Check for list items with anchors:
870      // "- []{#id} content" or "1. []{#id} content"
871      if let Some(anchor_start) = find_list_item_anchor(trimmed)
872        && let Some(processed_line) =
873          process_list_item_anchor(line, anchor_start)
874      {
875        result.push_str(&processed_line);
876        result.push('\n');
877        continue;
878      }
879
880      // Process regular inline anchors in the line
881      result.push_str(&process_line_anchors(line));
882    }
883    result.push('\n');
884  }
885
886  result
887}
888
889/// Process Pandoc/CommonMark bracketed spans: `[text]{#id .class key=value}`.
890#[cfg(feature = "nixpkgs")]
891#[must_use]
892pub fn process_bracketed_spans(content: &str) -> String {
893  let mut result = String::with_capacity(content.len());
894  let mut fence_tracker = crate::utils::codeblock::FenceTracker::new();
895
896  for line in content.lines() {
897    fence_tracker = fence_tracker.process_line(line);
898    if fence_tracker.in_code_block() {
899      result.push_str(line);
900    } else {
901      result.push_str(&process_line_bracketed_spans(line));
902    }
903    result.push('\n');
904  }
905
906  result
907}
908
909#[cfg(feature = "nixpkgs")]
910fn process_line_bracketed_spans(line: &str) -> String {
911  let mut result = String::with_capacity(line.len());
912  let mut chars = line.chars().peekable();
913  let mut tracker = crate::utils::codeblock::InlineTracker::new();
914  let mut previous = None;
915
916  while let Some(ch) = chars.next() {
917    if ch == '`' {
918      let (new_tracker, tick_count) = tracker.process_backticks(&mut chars);
919      tracker = new_tracker;
920      result.push_str(&"`".repeat(tick_count));
921      previous = Some('`');
922      continue;
923    }
924
925    if ch == '[' && previous != Some('!') && !tracker.in_any_code() {
926      let remaining: String = chars.clone().collect();
927      if let Some((html, consumed)) = parse_bracketed_span(&remaining) {
928        for _ in 0..consumed {
929          chars.next();
930        }
931        result.push_str(&html);
932        previous = Some('>');
933        continue;
934      }
935    }
936
937    result.push(ch);
938    previous = Some(ch);
939  }
940
941  result
942}
943
944#[cfg(feature = "nixpkgs")]
945fn parse_bracketed_span(input: &str) -> Option<(String, usize)> {
946  let close_text = input.find(']')?;
947  if close_text == 0 {
948    return None;
949  }
950  let text = &input[..close_text];
951  let after_text = &input[close_text + 1..];
952  if !after_text.starts_with('{') {
953    return None;
954  }
955  let close_attrs = after_text.find('}')?;
956  let attrs = &after_text[1..close_attrs];
957  let html_attrs = render_span_attrs(attrs)?;
958  let html = format!("<span{html_attrs}>{}</span>", encode_text(text));
959  Some((html, close_text + 1 + close_attrs + 1))
960}
961
962#[cfg(feature = "nixpkgs")]
963fn render_span_attrs(attrs: &str) -> Option<String> {
964  let mut id = None;
965  let mut classes = Vec::new();
966  let mut pairs = Vec::new();
967
968  for attr in attrs.split_whitespace() {
969    if let Some(value) = attr.strip_prefix('#') {
970      if !value.is_empty() {
971        id = Some(value);
972      }
973    } else if let Some(value) = attr.strip_prefix('.') {
974      if !value.is_empty() {
975        classes.push(value);
976      }
977    } else if let Some((key, value)) = attr.split_once('=')
978      && key
979        .chars()
980        .all(|ch| ch.is_ascii_alphanumeric() || ch == '-' || ch == '_')
981    {
982      pairs.push((key, value.trim_matches('"')));
983    }
984  }
985
986  if id.is_none() && classes.is_empty() && pairs.is_empty() {
987    return None;
988  }
989
990  let mut rendered = String::new();
991  if let Some(id) = id {
992    let _ = write!(rendered, " id=\"{}\"", encode_double_quoted_attribute(id));
993  }
994  if !classes.is_empty() {
995    let _ = write!(
996      rendered,
997      " class=\"{}\"",
998      encode_double_quoted_attribute(&classes.join(" "))
999    );
1000  }
1001  for (key, value) in pairs {
1002    let _ = write!(
1003      rendered,
1004      " {key}=\"{}\"",
1005      encode_double_quoted_attribute(value)
1006    );
1007  }
1008
1009  Some(rendered)
1010}
1011
1012/// Find if a line starts with a list marker followed by an anchor.
1013fn find_list_item_anchor(trimmed: &str) -> Option<usize> {
1014  // Check for unordered list: "- []{#id}" or "* []{#id}" or "+ []{#id}"
1015  if (trimmed.starts_with("- ")
1016    || trimmed.starts_with("* ")
1017    || trimmed.starts_with("+ "))
1018    && trimmed.len() > 2
1019  {
1020    let after_marker = &trimmed[2..];
1021    if after_marker.starts_with("[]{#") {
1022      return Some(2);
1023    }
1024  }
1025
1026  // Check for ordered list: "1. []{#id}" or "123. []{#id}".
1027  let digit_end = trimmed
1028    .char_indices()
1029    .find(|(_, c)| !c.is_ascii_digit())
1030    .map_or(trimmed.len(), |(i, _)| i);
1031  if digit_end > 0
1032    && digit_end < trimmed.len() - 1
1033    && trimmed.as_bytes().get(digit_end) == Some(&b'.')
1034  {
1035    let after_marker = &trimmed[digit_end + 1..];
1036    if after_marker.starts_with(" []{#") {
1037      return Some(digit_end + 2);
1038    }
1039  }
1040
1041  None
1042}
1043
1044/// Process a list item line that contains an anchor.
1045fn process_list_item_anchor(line: &str, anchor_start: usize) -> Option<String> {
1046  let before_anchor = &line[..anchor_start];
1047  let after_marker = &line[anchor_start..];
1048
1049  if !after_marker.starts_with("[]{#") {
1050    return None;
1051  }
1052
1053  // Find the end of the anchor: []{#id}
1054  if let Some(anchor_end) = after_marker.find('}') {
1055    let id = &after_marker[4..anchor_end]; // skip "[]{#" and take until '}'
1056    let remaining_content = &after_marker[anchor_end + 1..]; // skip '}'
1057
1058    // Validate ID contains only allowed characters
1059    if id
1060      .chars()
1061      .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
1062      && !id.is_empty()
1063    {
1064      return Some(format!(
1065        "{before_anchor}<span id=\"{id}\" \
1066         class=\"nixos-anchor\"></span>{remaining_content}"
1067      ));
1068    }
1069  }
1070
1071  None
1072}
1073
1074/// Process inline anchors in a single line.
1075fn process_line_anchors(line: &str) -> String {
1076  let mut result = String::with_capacity(line.len());
1077  let mut chars = line.chars().peekable();
1078  let mut tracker = crate::utils::codeblock::InlineTracker::new();
1079
1080  while let Some(ch) = chars.next() {
1081    if ch == '`' {
1082      let (new_tracker, tick_count) = tracker.process_backticks(&mut chars);
1083      tracker = new_tracker;
1084      result.push_str(&"`".repeat(tick_count));
1085      continue;
1086    }
1087
1088    if ch == '[' && chars.peek() == Some(&']') && !tracker.in_any_code() {
1089      chars.next(); // consume ']'
1090
1091      // Check for {#id} pattern
1092      if chars.peek() == Some(&'{') {
1093        chars.next(); // consume '{'
1094        if chars.peek() == Some(&'#') {
1095          chars.next(); // consume '#'
1096
1097          // Collect the ID
1098          let mut id = String::new();
1099          while let Some(&next_ch) = chars.peek() {
1100            if next_ch == '}' {
1101              chars.next(); // consume '}'
1102
1103              // Validate ID and create span
1104              if !id.is_empty()
1105                && id
1106                  .chars()
1107                  .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
1108              {
1109                let _ = write!(
1110                  result,
1111                  "<span id=\"{id}\" class=\"nixos-anchor\"></span>"
1112                );
1113              } else {
1114                // Invalid ID, put back original text
1115                let _ = write!(result, "[]{{{{#{id}}}}}");
1116              }
1117              break;
1118            } else if next_ch.is_ascii_alphanumeric()
1119              || next_ch == '-'
1120              || next_ch == '_'
1121            {
1122              id.push(next_ch);
1123              chars.next();
1124            } else {
1125              // Invalid character, put back original text
1126              let _ = write!(result, "[]{{{{#{id}");
1127              break;
1128            }
1129          }
1130        } else {
1131          // Not an anchor, put back consumed characters
1132          result.push_str("]{");
1133        }
1134      } else {
1135        // Not an anchor, put back consumed character
1136        result.push(']');
1137      }
1138    } else {
1139      result.push(ch);
1140    }
1141  }
1142
1143  result
1144}
1145
1146/// Process block elements in markdown content.
1147///
1148/// This function processes block elements including admonitions, figures, and
1149/// definition lists while being code-block aware to avoid processing inside
1150/// code fences.
1151///
1152/// # Arguments
1153///
1154/// * `content` - The markdown content to process
1155///
1156/// # Returns
1157///
1158/// The processed markdown with block elements converted to HTML
1159#[cfg(feature = "nixpkgs")]
1160#[must_use]
1161pub fn process_block_elements(content: &str) -> String {
1162  let mut result = Vec::new();
1163  let mut lines = content.lines().peekable();
1164  let mut fence_tracker = crate::utils::codeblock::FenceTracker::new();
1165
1166  while let Some(line) = lines.next() {
1167    if line.trim() == INCLUDE_BOUNDARY_MARKER {
1168      continue;
1169    }
1170
1171    // Update fence tracking state
1172    fence_tracker = fence_tracker.process_line(line);
1173
1174    // Only process block elements if we're not in a code block
1175    if !fence_tracker.in_code_block() {
1176      // Check for GitHub-style callouts: > [!TYPE]
1177      if let Some((callout_type, initial_content)) = parse_github_callout(line)
1178      {
1179        let content =
1180          collect_github_callout_content(&mut lines, &initial_content);
1181        let admonition = render_admonition(&callout_type, None, &content);
1182        result.push(admonition);
1183        continue;
1184      }
1185
1186      // Check for fenced admonitions: ::: {.type}
1187      if let Some(admonition_start) = parse_fenced_admonition_start(line) {
1188        let indent = leading_whitespace(line);
1189        let (content, trailing) = collect_fenced_content(
1190          &mut lines,
1191          indent,
1192          admonition_start.fence_len,
1193        );
1194        let content = process_block_elements(&content);
1195        let admonition = indent_block(
1196          &render_admonition(
1197            &admonition_start.adm_type,
1198            admonition_start.id.as_deref(),
1199            &content,
1200          ),
1201          indent,
1202        );
1203        result.push(admonition);
1204        // If there's trailing content after the closing :::, add it as a new
1205        // line
1206        if let Some(trailing_content) = trailing {
1207          result.push(trailing_content);
1208        }
1209        continue;
1210      }
1211
1212      // Check for figures: ::: {.figure #id}
1213      if let Some((id, title, content)) = parse_figure_block(line, &mut lines) {
1214        let figure = render_figure(id.as_deref(), &title, &content);
1215        result.push(figure);
1216        continue;
1217      }
1218    }
1219
1220    // Regular line, keep as-is
1221    result.push(line.to_string());
1222  }
1223
1224  result.join("\n")
1225}
1226
1227/// Parse GitHub-style callout syntax: > [!TYPE] content
1228fn parse_github_callout(line: &str) -> Option<(String, String)> {
1229  let trimmed = line.trim_start();
1230  if !trimmed.starts_with("> [!") {
1231    return None;
1232  }
1233
1234  // Find the closing bracket
1235  if let Some(close_bracket) = trimmed.find(']')
1236    && close_bracket > 4
1237  {
1238    let callout_type = &trimmed[4..close_bracket];
1239
1240    // Validate callout type
1241    match callout_type {
1242      "NOTE" | "TIP" | "IMPORTANT" | "WARNING" | "CAUTION" | "DANGER" => {
1243        let content = trimmed[close_bracket + 1..].trim();
1244        return Some((callout_type.to_lowercase(), content.to_string()));
1245      },
1246      _ => return None,
1247    }
1248  }
1249
1250  None
1251}
1252
1253/// Check if a line starts with a valid ATX header (1-6 '#' followed by
1254/// whitespace or EOL).
1255///
1256/// Per `CommonMark` spec, an ATX header requires 1-6 '#' characters followed by
1257/// either:
1258///
1259/// - A whitespace character (space, tab, etc.)
1260/// - End of line (the string ends)
1261///
1262/// # Arguments
1263///
1264/// * `line` - The line to check
1265///
1266/// # Returns
1267///
1268/// `true` if the line starts with a valid ATX header marker
1269fn is_atx_header(line: &str) -> bool {
1270  let mut chars = line.chars();
1271  let mut hash_count = 0;
1272
1273  // Count leading '#' characters (max 6)
1274  while let Some(c) = chars.next() {
1275    if c == '#' {
1276      hash_count += 1;
1277      if hash_count > 6 {
1278        return false;
1279      }
1280    } else {
1281      // Found a non-'#' character, check if it's whitespace or we're at EOL
1282      return (1..=6).contains(&hash_count)
1283        && (c.is_whitespace() || chars.as_str().is_empty());
1284    }
1285  }
1286
1287  // Reached end of string, check if we have 1-6 hashes
1288  (1..=6).contains(&hash_count)
1289}
1290
1291/// Collect content for GitHub-style callouts
1292fn collect_github_callout_content(
1293  lines: &mut std::iter::Peekable<std::str::Lines>,
1294  initial_content: &str,
1295) -> String {
1296  let mut content = String::new();
1297
1298  if !initial_content.is_empty() {
1299    content.push_str(initial_content);
1300    content.push('\n');
1301  }
1302
1303  while let Some(line) = lines.peek() {
1304    let trimmed = line.trim_start();
1305
1306    // Empty line ends the blockquote
1307    if trimmed.is_empty() {
1308      break;
1309    }
1310
1311    // Check if this is a continuation line with `>`
1312    let content_part = if trimmed.starts_with('>') {
1313      trimmed.strip_prefix('>').unwrap_or("").trim_start()
1314    } else {
1315      // Check if this line starts a new block element that cannot be
1316      // lazy-continued ATX headers, setext header underlines, code
1317      // fences, and thematic breaks
1318      let starts_new_block = is_atx_header(trimmed)
1319        || trimmed.starts_with("```")
1320        || trimmed.starts_with("~~~")
1321        || (trimmed.starts_with("---")
1322          && trimmed.chars().all(|c| c == '-' || c.is_whitespace()))
1323        || (trimmed.starts_with("===")
1324          && trimmed.chars().all(|c| c == '=' || c.is_whitespace()))
1325        || (trimmed.starts_with("***")
1326          && trimmed.chars().all(|c| c == '*' || c.is_whitespace()));
1327
1328      if starts_new_block {
1329        break;
1330      }
1331
1332      // Lazy continuation
1333      // Mind you, "lazy" doesn't refer to me being lazy but the GFM feature for
1334      // a line without `>` that continues the blockquote
1335      // paragraph
1336      trimmed
1337    };
1338
1339    content.push_str(content_part);
1340    content.push('\n');
1341    lines.next(); // consume the line
1342  }
1343
1344  content.trim().to_string()
1345}
1346
1347/// Parse fenced admonition start: ::: {.type #id}
1348struct AdmonitionStart {
1349  adm_type:  String,
1350  id:        Option<String>,
1351  fence_len: usize,
1352}
1353
1354fn parse_fenced_admonition_start(line: &str) -> Option<AdmonitionStart> {
1355  let trimmed = line.trim();
1356  if !trimmed.starts_with(":::") {
1357    return None;
1358  }
1359
1360  let fence_len = trimmed.chars().take_while(|&ch| ch == ':').count();
1361  if fence_len < 3 {
1362    return None;
1363  }
1364
1365  let after_colons = trimmed[fence_len..].trim_start();
1366  if !after_colons.starts_with('{') {
1367    return None;
1368  }
1369
1370  // Find the closing brace
1371  if let Some(close_brace) = after_colons.find('}') {
1372    let content = &after_colons[1..close_brace]; // skip "{"
1373
1374    // Parse type and optional ID. Attribute lists allow either order:
1375    // `{.warning #id}` and `{#id .warning}` are equivalent.
1376    let mut first_class = None;
1377    let mut adm_type = None;
1378    let mut id = None;
1379    for part in content.split_whitespace() {
1380      if let Some(value) = part.strip_prefix('.') {
1381        let value = value.to_ascii_lowercase();
1382        first_class.get_or_insert_with(|| value.clone());
1383        if matches!(
1384          value.as_str(),
1385          "note" | "tip" | "important" | "warning" | "caution" | "danger"
1386        ) {
1387          adm_type.get_or_insert(value);
1388        }
1389      } else if let Some(value) = part.strip_prefix('#') {
1390        id.get_or_insert_with(|| value.to_string());
1391      }
1392    }
1393
1394    if let Some(adm_type) = adm_type.or(first_class) {
1395      return Some(AdmonitionStart {
1396        adm_type,
1397        id,
1398        fence_len,
1399      });
1400    }
1401  }
1402
1403  None
1404}
1405
1406fn leading_whitespace(line: &str) -> &str {
1407  let end = line
1408    .char_indices()
1409    .find_map(|(idx, ch)| (!ch.is_whitespace()).then_some(idx))
1410    .unwrap_or(line.len());
1411  &line[..end]
1412}
1413
1414fn strip_indent<'a>(line: &'a str, indent: &str) -> &'a str {
1415  line.strip_prefix(indent).unwrap_or(line)
1416}
1417
1418fn indent_block(block: &str, indent: &str) -> String {
1419  if indent.is_empty() {
1420    return block.to_string();
1421  }
1422
1423  block
1424    .lines()
1425    .map(|line| format!("{indent}{line}"))
1426    .collect::<Vec<_>>()
1427    .join("\n")
1428}
1429
1430/// Collect content until closing :::
1431///
1432/// # Returns
1433///
1434/// Tuple of (`admonition_content`, `trailing_content`). If there's content
1435/// after the closing `:::` on the same line, it's returned as
1436/// `trailing_content`.
1437fn collect_fenced_content(
1438  lines: &mut std::iter::Peekable<std::str::Lines>,
1439  indent: &str,
1440  fence_len: usize,
1441) -> (String, Option<String>) {
1442  let mut content = String::new();
1443
1444  for line in lines.by_ref() {
1445    let line = strip_indent(line, indent);
1446    let trimmed = line.trim();
1447    if trimmed == INCLUDE_BOUNDARY_MARKER {
1448      return (content.trim().to_string(), None);
1449    }
1450
1451    let closing_len = trimmed.chars().take_while(|&ch| ch == ':').count();
1452    if closing_len >= fence_len {
1453      // check if there's content after the closing :::
1454      let after_colons = &trimmed[closing_len..];
1455      if !after_colons.is_empty() {
1456        // there's trailing content on the same line as the closing delimiter
1457        return (content.trim().to_string(), Some(after_colons.to_string()));
1458      }
1459      break;
1460    }
1461    content.push_str(line);
1462    content.push('\n');
1463  }
1464
1465  (content.trim().to_string(), None)
1466}
1467
1468/// Parse figure block: ::: {.figure #id}
1469#[allow(
1470  clippy::option_if_let_else,
1471  reason = "Nested options clearer with if-let"
1472)]
1473fn parse_figure_block(
1474  line: &str,
1475  lines: &mut std::iter::Peekable<std::str::Lines>,
1476) -> Option<(Option<String>, String, String)> {
1477  let trimmed = line.trim();
1478  if !trimmed.starts_with(":::") {
1479    return None;
1480  }
1481
1482  let after_colons = trimmed[3..].trim_start();
1483  if !after_colons.starts_with("{.figure") {
1484    return None;
1485  }
1486
1487  // Extract ID if present
1488  let id = if let Some(hash_pos) = after_colons.find('#') {
1489    if let Some(close_brace) = after_colons.find('}') {
1490      if hash_pos < close_brace {
1491        Some(after_colons[hash_pos + 1..close_brace].trim().to_string())
1492      } else {
1493        None
1494      }
1495    } else {
1496      None
1497    }
1498  } else {
1499    None
1500  };
1501
1502  // Get title from next line (should start with #)
1503  let title = if let Some(title_line) = lines.next() {
1504    let trimmed_title = title_line.trim();
1505    if let Some(this) = trimmed_title.strip_prefix('#') {
1506      { this.trim_matches(char::is_whitespace) }.to_string()
1507    } else {
1508      // Put the line back if it's not a title
1509      return None;
1510    }
1511  } else {
1512    return None;
1513  };
1514
1515  // Collect figure content
1516  let mut content = String::new();
1517  for line in lines.by_ref() {
1518    let trimmed = line.trim();
1519    if trimmed == INCLUDE_BOUNDARY_MARKER || trimmed.starts_with(":::") {
1520      break;
1521    }
1522    content.push_str(line);
1523    content.push('\n');
1524  }
1525
1526  Some((id, title, content.trim().to_string()))
1527}
1528
1529/// Render an admonition as HTML
1530fn render_admonition(
1531  adm_type: &str,
1532  id: Option<&str>,
1533  content: &str,
1534) -> String {
1535  let capitalized_type = crate::utils::capitalize_first(adm_type);
1536  let id_attr = id.map_or(String::new(), |id| format!(" id=\"{id}\""));
1537
1538  let opening = format!(
1539    "<div class=\"admonition {adm_type}\"{id_attr}>\n<p \
1540     class=\"admonition-title\">{capitalized_type}</p>"
1541  );
1542  format!("{opening}\n\n{content}\n\n</div>\n")
1543}
1544
1545/// Render a figure as HTML
1546fn render_figure(id: Option<&str>, title: &str, content: &str) -> String {
1547  let id_attr = id.map_or(String::new(), |id| format!(" id=\"{id}\""));
1548
1549  format!(
1550    "<figure{id_attr}>\n<figcaption>{title}</figcaption>\n{content}\n</figure>"
1551  )
1552}
1553
1554/// Process manpage references in HTML content. Pocesses manpage references by
1555/// finding span elements with manpage-reference class and converting them to
1556/// links when URLs are available.
1557///
1558/// # Arguments
1559///
1560/// * `html` - The HTML content to process
1561/// * `manpage_urls` - Optional mapping of manpage names to URLs
1562///
1563/// # Returns
1564///
1565/// The processed HTML with manpage references converted to links
1566#[cfg(feature = "nixpkgs")]
1567#[must_use]
1568#[allow(
1569  clippy::implicit_hasher,
1570  reason = "Standard HashMap sufficient for this use case"
1571)]
1572pub fn process_manpage_references(
1573  html: &str,
1574  manpage_urls: Option<&std::collections::HashMap<String, String>>,
1575) -> String {
1576  process_safe(
1577    html,
1578    |html| {
1579      use kuchikikiki::NodeRef;
1580      use tendril::TendrilSink;
1581
1582      let document = kuchikikiki::parse_html().one(html);
1583      let mut to_replace = Vec::new();
1584
1585      // Find all spans with class "manpage-reference"
1586      for span_node in safe_select(&document, "span.manpage-reference") {
1587        let span_el = span_node;
1588        let span_text = span_el.text_contents();
1589
1590        if let Some(urls) = manpage_urls {
1591          // Check for direct URL match
1592          if let Some(url) = urls.get(&span_text) {
1593            let clean_url = extract_url_from_html(url);
1594            let link = NodeRef::new_element(
1595              markup5ever::QualName::new(
1596                None,
1597                markup5ever::ns!(html),
1598                markup5ever::local_name!("a"),
1599              ),
1600              vec![
1601                (
1602                  kuchikikiki::ExpandedName::new("", "href"),
1603                  kuchikikiki::Attribute {
1604                    prefix: None,
1605                    value:  clean_url.into(),
1606                  },
1607                ),
1608                (
1609                  kuchikikiki::ExpandedName::new("", "class"),
1610                  kuchikikiki::Attribute {
1611                    prefix: None,
1612                    value:  "manpage-reference".into(),
1613                  },
1614                ),
1615              ],
1616            );
1617            link.append(NodeRef::new_text(span_text.clone()));
1618            to_replace.push((span_el.clone(), link));
1619          }
1620        }
1621      }
1622
1623      // Apply replacements
1624      for (old, new) in to_replace {
1625        old.insert_before(new);
1626        old.detach();
1627      }
1628
1629      let mut out = Vec::new();
1630      let _ = document.serialize(&mut out);
1631      String::from_utf8(out).unwrap_or_else(|_| html.to_string())
1632    },
1633    // Return original HTML on error
1634    "",
1635  )
1636}
1637
1638/// Process option references
1639/// Converts {option} role markup into links to the options page.
1640///
1641/// This processes `<code>` elements that have the `nixos-option` class, i.e.,
1642/// {option} role markup and convert them into links to the options page.
1643///
1644/// # Arguments
1645///
1646/// * `html` - The HTML string to process.
1647/// * `valid_options` - Optional set of valid option names for validation.
1648///
1649/// # Returns
1650///
1651/// The HTML string with option references rewritten as links.
1652#[cfg(feature = "ndg-flavored")]
1653#[must_use]
1654#[allow(
1655  clippy::implicit_hasher,
1656  reason = "Standard HashSet sufficient for this use case"
1657)]
1658pub fn process_option_references(
1659  html: &str,
1660  valid_options: Option<&std::collections::HashSet<String>>,
1661) -> String {
1662  use kuchikikiki::{Attribute, ExpandedName, NodeRef};
1663  use markup5ever::{QualName, local_name, ns};
1664  use tendril::TendrilSink;
1665
1666  process_safe(
1667    html,
1668    |html| {
1669      let document = kuchikikiki::parse_html().one(html);
1670
1671      let mut to_replace = vec![];
1672
1673      // Only process code elements that already have the nixos-option class
1674      // from {option} role syntax
1675      for code_node in safe_select(&document, "code.nixos-option") {
1676        let code_el = code_node;
1677        let code_text = code_el.text_contents();
1678
1679        // Skip if already wrapped in an option-reference link
1680        let mut is_already_option_ref = false;
1681        let mut current = code_el.parent();
1682        while let Some(parent) = current {
1683          if let Some(element) = parent.as_element()
1684            && element.name.local == local_name!("a")
1685            && let Some(class_attr) =
1686              element.attributes.borrow().get(local_name!("class"))
1687            && class_attr.contains("option-reference")
1688          {
1689            is_already_option_ref = true;
1690            break;
1691          }
1692          current = parent.parent();
1693        }
1694
1695        if !is_already_option_ref {
1696          // Check if validation is enabled and option is valid. If no
1697          // validation set, link all options
1698          let should_link =
1699            valid_options.is_none_or(|opts| opts.contains(code_text.as_str()));
1700
1701          if should_link {
1702            let option_id = sanitize_option_id(code_text.as_str());
1703            let attrs = vec![
1704              (ExpandedName::new("", "href"), Attribute {
1705                prefix: None,
1706                value:  format!("options.html#{option_id}"),
1707              }),
1708              (ExpandedName::new("", "class"), Attribute {
1709                prefix: None,
1710                value:  "option-reference".into(),
1711              }),
1712            ];
1713            let a = NodeRef::new_element(
1714              QualName::new(None, ns!(html), local_name!("a")),
1715              attrs,
1716            );
1717            let code = NodeRef::new_element(
1718              QualName::new(None, ns!(html), local_name!("code")),
1719              vec![],
1720            );
1721            code.append(NodeRef::new_text(code_text.clone()));
1722            a.append(code);
1723            to_replace.push((code_el.clone(), a));
1724          }
1725          // If should_link is false, leave the code element as-is (no wrapping)
1726        }
1727      }
1728
1729      for (old, new) in to_replace {
1730        old.insert_before(new);
1731        old.detach();
1732      }
1733
1734      let mut out = Vec::new();
1735      let _ = document.serialize(&mut out);
1736      String::from_utf8(out).unwrap_or_else(|_| html.to_string())
1737    },
1738    // Return original HTML on error
1739    "",
1740  )
1741}
1742
1743/// Extract URL from HTML anchor tag or return the string as-is if it's a plain
1744/// URL
1745fn extract_url_from_html(url_or_html: &str) -> &str {
1746  // Check if it looks like HTML (starts with <a href=")
1747  if url_or_html.starts_with("<a href=\"") {
1748    // Extract the URL from href attribute
1749    if let Some(start) = url_or_html.find("href=\"") {
1750      let start = start + 6; // Skip 'href="'
1751      if let Some(end) = url_or_html[start..].find('"') {
1752        return &url_or_html[start..start + end];
1753      }
1754    }
1755  }
1756
1757  // Return as-is if not HTML or if extraction fails
1758  url_or_html
1759}
1760
1761/// Process wikilinks and Obsidian-style links in markdown content.
1762///
1763/// Converts:
1764///
1765/// - `[[page]]` (Obsidian link) -> `[page](page.html)`
1766/// - `[[name|url]]` (Wiki link) -> `[name](url)`
1767///
1768/// Being code-block aware to avoid processing inside fenced code blocks.
1769///
1770/// # Arguments
1771///
1772/// * `content` - The markdown content to process
1773///
1774/// # Returns
1775///
1776/// The processed markdown with wiki/Obsidian links converted to HTML
1777#[cfg(feature = "wiki")]
1778#[must_use]
1779pub fn process_wikilinks(content: &str) -> String {
1780  use crate::utils::codeblock::FenceTracker;
1781
1782  let mut result = String::with_capacity(content.len());
1783  let lines = content.lines();
1784  let mut tracker = FenceTracker::new();
1785
1786  for line in lines {
1787    tracker = tracker.process_line(line);
1788
1789    if tracker.in_code_block() {
1790      result.push_str(line);
1791    } else {
1792      result.push_str(&process_line_wikilinks(line));
1793    }
1794    result.push('\n');
1795  }
1796
1797  result.trim_end().to_string()
1798}
1799
1800/// Process wikilinks in a single line.
1801#[cfg(feature = "wiki")]
1802fn process_line_wikilinks(line: &str) -> String {
1803  let mut result = String::with_capacity(line.len());
1804  let mut chars = line.chars().peekable();
1805
1806  while let Some(ch) = chars.next() {
1807    if ch == '[' && chars.peek() == Some(&'[') {
1808      chars.next();
1809
1810      let mut inner = String::new();
1811      let mut found_double_close = false;
1812
1813      while let Some(&next_ch) = chars.peek() {
1814        chars.next();
1815        if next_ch == ']' && chars.peek() == Some(&']') {
1816          chars.next();
1817          found_double_close = true;
1818          break;
1819        }
1820        inner.push(next_ch);
1821      }
1822
1823      if found_double_close {
1824        if inner.is_empty() {
1825          result.push_str("[[]]");
1826        } else if inner.contains('|') {
1827          let parts: Vec<&str> = inner.splitn(2, '|').collect();
1828          let name = parts[0].trim();
1829          let url = parts.get(1).unwrap_or(&name).trim();
1830          let escaped_name = encode_text(name);
1831          let escaped_url = encode_text(url);
1832          let _ = write!(
1833            result,
1834            "<a href=\"{escaped_url}\" class=\"wikilink\">{escaped_name}</a>"
1835          );
1836        } else {
1837          let page = inner.trim();
1838          let escaped_page = encode_text(page);
1839          let link_target = format!("{page}.html");
1840          let _ = write!(
1841            result,
1842            "<a href=\"{link_target}\" \
1843             class=\"obsidian-link\">{escaped_page}</a>"
1844          );
1845        }
1846      } else {
1847        result.push_str("[[");
1848        result.push_str(&inner);
1849      }
1850    } else {
1851      result.push(ch);
1852    }
1853  }
1854
1855  result
1856}
1857
1858#[cfg(test)]
1859mod tests {
1860  use super::*;
1861
1862  #[test]
1863  fn test_is_atx_header_valid_headers() {
1864    // valid ATX headers with 1-6 hashes followed by space
1865    assert!(is_atx_header("# Header"));
1866    assert!(is_atx_header("## Header"));
1867    assert!(is_atx_header("### Header"));
1868    assert!(is_atx_header("#### Header"));
1869    assert!(is_atx_header("##### Header"));
1870    assert!(is_atx_header("###### Header"));
1871
1872    // valid ATX headers with tab after hashes
1873    assert!(is_atx_header("#\tHeader"));
1874    assert!(is_atx_header("##\tHeader"));
1875
1876    // valid ATX headers with just hashes (no content after)
1877    assert!(is_atx_header("#"));
1878    assert!(is_atx_header("##"));
1879    assert!(is_atx_header("###"));
1880    assert!(is_atx_header("####"));
1881    assert!(is_atx_header("#####"));
1882    assert!(is_atx_header("######"));
1883
1884    // valid ATX headers with multiple spaces
1885    assert!(is_atx_header("#  Header with multiple spaces"));
1886    assert!(is_atx_header("##   Header"));
1887  }
1888
1889  #[test]
1890  fn test_is_atx_header_invalid_headers() {
1891    // more than 6 hashes
1892    assert!(!is_atx_header("####### Too many hashes"));
1893    assert!(!is_atx_header("######## Even more"));
1894
1895    // no space after hash
1896    assert!(!is_atx_header("#NoSpace"));
1897    assert!(!is_atx_header("##NoSpace"));
1898
1899    // hash in the middle
1900    assert!(!is_atx_header("Not # a header"));
1901
1902    // empty string
1903    assert!(!is_atx_header(""));
1904
1905    // no hash at all
1906    assert!(!is_atx_header("Regular text"));
1907
1908    // hash with non-whitespace immediately after
1909    assert!(!is_atx_header("#hashtag"));
1910    assert!(!is_atx_header("##hashtag"));
1911    assert!(!is_atx_header("#123"));
1912    assert!(!is_atx_header("##abc"));
1913
1914    // special characters immediately after hash
1915    assert!(!is_atx_header("#!important"));
1916    assert!(!is_atx_header("#@mention"));
1917    assert!(!is_atx_header("#$variable"));
1918  }
1919
1920  #[test]
1921  fn test_is_atx_header_edge_cases() {
1922    // whitespace before hash is handled by caller (trimmed)
1923    // but testing it here to ensure robustness
1924    assert!(!is_atx_header(" # Header"));
1925    assert!(!is_atx_header("  ## Header"));
1926
1927    // only spaces after hash (should be valid)
1928    assert!(is_atx_header("#     "));
1929    assert!(is_atx_header("##    "));
1930
1931    // newline handling (string ends after valid header marker)
1932    assert!(is_atx_header("# Header\n"));
1933    assert!(is_atx_header("## Header\n"));
1934
1935    // mixed whitespace after hash
1936    assert!(is_atx_header("# \t  Header"));
1937    assert!(is_atx_header("##  \tHeader"));
1938  }
1939
1940  #[test]
1941  fn test_is_atx_header_blockquote_context() {
1942    // these are the types of strings that would be passed from
1943    // collect_github_callout_content after trim_start()
1944    assert!(is_atx_header("# New Section"));
1945    assert!(is_atx_header("## Subsection"));
1946
1947    // non-headers that should not break blockquote
1948    assert!(!is_atx_header("#tag"));
1949    assert!(!is_atx_header("##issue-123"));
1950    assert!(!is_atx_header("###no-space"));
1951
1952    // edge case: exactly 6 hashes (valid)
1953    assert!(is_atx_header("###### Level 6"));
1954
1955    // edge case: 7 hashes (invalid)
1956    assert!(!is_atx_header("####### Not valid"));
1957  }
1958
1959  #[cfg(feature = "wiki")]
1960  #[test]
1961  fn test_wikilink_obsidian_basic() {
1962    let input = "Check out [[Some Page]] for details.";
1963    let result = process_wikilinks(input);
1964    assert!(result.contains("href=\"Some Page.html\""));
1965    assert!(result.contains("class=\"obsidian-link\""));
1966    assert!(result.contains(">Some Page<"));
1967  }
1968
1969  #[cfg(feature = "wiki")]
1970  #[test]
1971  fn test_wikilink_with_url() {
1972    let input = "See [[Custom Name|https://example.com]]";
1973    let result = process_wikilinks(input);
1974    assert!(result.contains("href=\"https://example.com\""));
1975    assert!(result.contains("class=\"wikilink\""));
1976    assert!(result.contains(">Custom Name<"));
1977  }
1978
1979  #[cfg(feature = "wiki")]
1980  #[test]
1981  fn test_wikilink_with_spaces() {
1982    let input = "[[My Page Name]]";
1983    let result = process_wikilinks(input);
1984    assert!(result.contains("href=\"My Page Name.html\""));
1985  }
1986
1987  #[cfg(feature = "wiki")]
1988  #[test]
1989  fn test_wikilink_in_code_block() {
1990    let input = "```\n[[Wiki Link]]\n```\nThen [[Another]]";
1991    let result = process_wikilinks(input);
1992    assert!(result.contains("[[Wiki Link]]"));
1993    assert!(result.contains("href=\"Another.html\""));
1994  }
1995
1996  #[cfg(feature = "wiki")]
1997  #[test]
1998  fn test_wikilink_empty() {
1999    let input = "[[]]";
2000    let result = process_wikilinks(input);
2001    assert!(result.contains("[[]]"));
2002  }
2003
2004  #[cfg(feature = "wiki")]
2005  #[test]
2006  fn test_wikilink_malformed() {
2007    let input = "[[ incomplete";
2008    let result = process_wikilinks(input);
2009    assert!(result.contains("[[ incomplete"));
2010  }
2011
2012  #[cfg(feature = "wiki")]
2013  #[test]
2014  fn test_wikilink_html_escaping() {
2015    let input = "See [[Page With <script>]] for info";
2016    let result = process_wikilinks(input);
2017    assert!(result.contains("&lt;script&gt;"));
2018    assert!(!result.contains(">Page With <script><"));
2019  }
2020}