ndg_commonmark/processor/
extensions.rs

1//! Feature-specific Markdown processing extensions.
2use std::{fmt::Write, fs, path::Path};
3
4use html_escape;
5
6use super::process::process_safe;
7
8/// Safely select DOM elements with graceful error handling.
9fn safe_select(
10  document: &kuchikikiki::NodeRef,
11  selector: &str,
12) -> Vec<kuchikikiki::NodeRef> {
13  match document.select(selector) {
14    Ok(selections) => selections.map(|sel| sel.as_node().clone()).collect(),
15    Err(e) => {
16      log::warn!("DOM selector '{selector}' failed: {e:?}");
17      Vec::new()
18    },
19  }
20}
21
22/// Apply GitHub Flavored Markdown (GFM) extensions to the input markdown.
23///
24/// This is a placeholder for future GFM-specific preprocessing or AST
25/// transformations. In practice, most GFM features are enabled via comrak
26/// options, but additional logic (such as custom tables, task lists, etc.) can
27/// be added here.
28///
29/// # Arguments
30/// * `markdown` - The input markdown text
31///
32/// # Returns
33/// The processed markdown text with GFM extensions applied
34#[cfg(feature = "gfm")]
35#[must_use]
36pub fn apply_gfm_extensions(markdown: &str) -> String {
37  // XXX: Comrak already supports GFM, but if there is any feature in the spec
38  // that is not implemented as we'd like for it to be, we can add it here.
39  markdown.to_owned()
40}
41
42/// Maximum recursion depth for file includes to prevent infinite recursion.
43const MAX_INCLUDE_DEPTH: usize = 8;
44
45/// Check if a path is safe for file inclusion (no absolute paths, no parent
46/// directory traversal).
47#[cfg(feature = "nixpkgs")]
48fn is_safe_path(path: &str, _base_dir: &Path) -> bool {
49  let p = Path::new(path);
50  if p.is_absolute() || path.contains('\\') {
51    return false;
52  }
53
54  // Reject any path containing parent directory components
55  for component in p.components() {
56    if matches!(component, std::path::Component::ParentDir) {
57      return false;
58    }
59  }
60
61  true
62}
63
64/// Parse the custom output directive from an include block.
65#[cfg(feature = "nixpkgs")]
66#[allow(
67  clippy::option_if_let_else,
68  reason = "Nested options are clearer with if-let"
69)]
70fn parse_include_directive(line: &str) -> Option<String> {
71  if let Some(start) = line.find("html:into-file=") {
72    let start = start + "html:into-file=".len();
73    if let Some(end) = line[start..].find(' ') {
74      Some(line[start..start + end].to_string())
75    } else {
76      Some(line[start..].trim().to_string())
77    }
78  } else {
79    None
80  }
81}
82
83/// Read and process files listed in an include block.
84#[cfg(feature = "nixpkgs")]
85#[allow(
86  clippy::needless_pass_by_value,
87  reason = "Owned value needed for cloning in loop"
88)]
89fn read_includes(
90  listing: &str,
91  base_dir: &Path,
92  custom_output: Option<String>,
93  included_files: &mut Vec<crate::types::IncludedFile>,
94  depth: usize,
95) -> Result<String, String> {
96  let mut result = String::new();
97
98  for line in listing.lines() {
99    let trimmed = line.trim();
100    if trimmed.is_empty() || !is_safe_path(trimmed, base_dir) {
101      continue;
102    }
103    let full_path = base_dir.join(trimmed);
104    log::info!("Including file: {}", full_path.display());
105
106    match fs::read_to_string(&full_path) {
107      Ok(content) => {
108        let file_dir = full_path.parent().unwrap_or(base_dir);
109        let (processed_content, nested_includes) =
110          process_file_includes(&content, file_dir, depth + 1)?;
111
112        result.push_str(&processed_content);
113        if !processed_content.ends_with('\n') {
114          result.push('\n');
115        }
116
117        included_files.push(crate::types::IncludedFile {
118          path:          trimmed.to_string(),
119          custom_output: custom_output.clone(),
120        });
121
122        // Normalize nested include paths relative to original base_dir
123        for nested in nested_includes {
124          let nested_full_path = file_dir.join(&nested.path);
125          if let Ok(normalized_path) = nested_full_path.strip_prefix(base_dir) {
126            included_files.push(crate::types::IncludedFile {
127              path:          normalized_path.to_string_lossy().to_string(),
128              custom_output: nested.custom_output,
129            });
130          }
131        }
132      },
133      Err(_) => {
134        let _ = writeln!(
135          result,
136          "<!-- ndg: could not include file: {} -->",
137          full_path.display()
138        );
139      },
140    }
141  }
142  Ok(result)
143}
144
145/// Process file includes in Nixpkgs/NixOS documentation.
146///
147/// This function processes file include syntax:
148///
149/// ````markdown
150/// ```{=include=}
151/// path/to/file1.md
152/// path/to/file2.md
153/// ```
154/// ````
155///
156/// # Arguments
157///
158/// * `markdown` - The input markdown text
159/// * `base_dir` - The base directory for resolving relative file paths
160/// * `depth` - Current recursion depth (use 0 for initial call)
161///
162/// # Returns
163///
164/// Returns `Ok((processed_markdown, included_files))` where `included_files` is
165/// a list of all successfully included files.
166///
167/// # Errors
168///
169/// Returns `Err(message)` if recursion depth exceeds [`MAX_INCLUDE_DEPTH`],
170/// which likely indicates a circular include cycle.
171///
172/// # Safety
173///
174/// Only relative paths without ".." are allowed for security.
175#[cfg(feature = "nixpkgs")]
176pub fn process_file_includes(
177  markdown: &str,
178  base_dir: &std::path::Path,
179  depth: usize,
180) -> Result<(String, Vec<crate::types::IncludedFile>), String> {
181  // Check recursion depth limit
182  if depth >= MAX_INCLUDE_DEPTH {
183    return Err(format!(
184      "Maximum include recursion depth ({MAX_INCLUDE_DEPTH}) exceeded. This \
185       likely indicates a cycle in file includes."
186    ));
187  }
188
189  let mut output = String::new();
190  let mut lines = markdown.lines();
191  let mut fence_tracker = crate::utils::codeblock::FenceTracker::new();
192  let mut all_included_files: Vec<crate::types::IncludedFile> = Vec::new();
193
194  while let Some(line) = lines.next() {
195    let trimmed = line.trim_start();
196
197    if !fence_tracker.in_code_block() && trimmed.starts_with("```{=include=}") {
198      let custom_output = parse_include_directive(trimmed);
199
200      let mut include_listing = String::new();
201      for next_line in lines.by_ref() {
202        if next_line.trim_start().starts_with("```") {
203          break;
204        }
205        include_listing.push_str(next_line);
206        include_listing.push('\n');
207      }
208
209      let included = read_includes(
210        &include_listing,
211        base_dir,
212        custom_output,
213        &mut all_included_files,
214        depth,
215      )?;
216      output.push_str(&included);
217      continue;
218    }
219
220    // Update fence tracking state
221    fence_tracker = fence_tracker.process_line(line);
222
223    output.push_str(line);
224    output.push('\n');
225  }
226
227  Ok((output, all_included_files))
228}
229
230/// Process role markup in markdown content.
231///
232/// This function processes role syntax like `{command}ls -la`
233///
234/// # Arguments
235///
236/// * `content` - The markdown content to process
237/// * `manpage_urls` - Optional mapping of manpage names to URLs
238/// * `auto_link_options` - Whether to convert {option} roles to links
239/// * `valid_options` - Optional set of valid option names for validation
240///
241/// # Returns
242///
243/// The processed markdown with role markup converted to HTML
244#[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
245#[must_use]
246#[allow(
247  clippy::implicit_hasher,
248  reason = "Standard HashMap/HashSet sufficient for this use case"
249)]
250pub fn process_role_markup(
251  content: &str,
252  manpage_urls: Option<&std::collections::HashMap<String, String>>,
253  auto_link_options: bool,
254  valid_options: Option<&std::collections::HashSet<String>>,
255) -> String {
256  let mut result = String::new();
257  let mut chars = content.chars().peekable();
258  let mut tracker = crate::utils::codeblock::InlineTracker::new();
259
260  while let Some(ch) = chars.next() {
261    // Handle backticks (code fences and inline code)
262    if ch == '`' {
263      let (new_tracker, tick_count) = tracker.process_backticks(&mut chars);
264      tracker = new_tracker;
265
266      // Add all the backticks
267      result.push_str(&"`".repeat(tick_count));
268      continue;
269    }
270
271    // Handle tilde code fences (~~~)
272    if ch == '~' && chars.peek() == Some(&'~') {
273      let (new_tracker, tilde_count) = tracker.process_tildes(&mut chars);
274      tracker = new_tracker;
275
276      result.push_str(&"~".repeat(tilde_count));
277      continue;
278    }
279
280    // Handle newlines
281    if ch == '\n' {
282      tracker = tracker.process_newline();
283      result.push(ch);
284      continue;
285    }
286
287    // Process role markup only if we're not in any kind of code
288    if ch == '{' && !tracker.in_any_code() {
289      // Collect remaining characters to test parsing
290      let remaining: Vec<char> = chars.clone().collect();
291      let remaining_str: String = remaining.iter().collect();
292      let mut temp_chars = remaining_str.chars().peekable();
293
294      if let Some(role_markup) = parse_role_markup(
295        &mut temp_chars,
296        manpage_urls,
297        auto_link_options,
298        valid_options,
299      ) {
300        // Valid role markup found, advance the main iterator
301        let remaining_after_parse: String = temp_chars.collect();
302        let consumed = remaining_str.len() - remaining_after_parse.len();
303        for _ in 0..consumed {
304          chars.next();
305        }
306        result.push_str(&role_markup);
307      } else {
308        // Not a valid role markup, keep the original character
309        result.push(ch);
310      }
311    } else {
312      result.push(ch);
313    }
314  }
315
316  result
317}
318
319/// Parse a role markup from the character iterator.
320///
321/// # Returns
322///
323/// `Some(html)` if a valid role markup is found, `None` otherwise.
324fn parse_role_markup(
325  chars: &mut std::iter::Peekable<std::str::Chars>,
326  manpage_urls: Option<&std::collections::HashMap<String, String>>,
327  auto_link_options: bool,
328  valid_options: Option<&std::collections::HashSet<String>>,
329) -> Option<String> {
330  let mut role_name = String::new();
331
332  // Parse role name (lowercase letters only)
333  while let Some(&ch) = chars.peek() {
334    if ch.is_ascii_lowercase() {
335      role_name.push(ch);
336      chars.next();
337    } else {
338      break;
339    }
340  }
341
342  // Must have a non-empty role name
343  if role_name.is_empty() {
344    return None;
345  }
346
347  // Expect closing brace
348  if chars.peek() != Some(&'}') {
349    return None;
350  }
351  chars.next(); // consume '}'
352
353  // Expect opening backtick
354  if chars.peek() != Some(&'`') {
355    return None;
356  }
357  chars.next(); // consume '`'
358
359  // Parse content until closing backtick
360  let mut content = String::new();
361  for ch in chars.by_ref() {
362    if ch == '`' {
363      // Found closing backtick, validate content
364      // Most role types should not have empty content
365      if content.is_empty() && !matches!(role_name.as_str(), "manpage") {
366        return None; // reject empty content for most roles
367      }
368      return Some(format_role_markup(
369        &role_name,
370        &content,
371        manpage_urls,
372        auto_link_options,
373        valid_options,
374      ));
375    }
376    content.push(ch);
377  }
378
379  // No closing backtick found
380  None
381}
382
383/// Format the role markup as HTML based on the role type and content.
384#[must_use]
385#[allow(
386  clippy::option_if_let_else,
387  reason = "Nested options clearer with if-let"
388)]
389#[allow(
390  clippy::implicit_hasher,
391  reason = "Standard HashMap/HashSet sufficient for this use case"
392)]
393pub fn format_role_markup(
394  role_type: &str,
395  content: &str,
396  manpage_urls: Option<&std::collections::HashMap<String, String>>,
397  auto_link_options: bool,
398  valid_options: Option<&std::collections::HashSet<String>>,
399) -> String {
400  let escaped_content = html_escape::encode_text(content);
401  match role_type {
402    "manpage" => {
403      if let Some(urls) = manpage_urls {
404        if let Some(url) = urls.get(content) {
405          format!(
406            "<a href=\"{url}\" \
407             class=\"manpage-reference\">{escaped_content}</a>"
408          )
409        } else {
410          format!("<span class=\"manpage-reference\">{escaped_content}</span>")
411        }
412      } else {
413        format!("<span class=\"manpage-reference\">{escaped_content}</span>")
414      }
415    },
416    "command" => format!("<code class=\"command\">{escaped_content}</code>"),
417    "env" => format!("<code class=\"env-var\">{escaped_content}</code>"),
418    "file" => format!("<code class=\"file-path\">{escaped_content}</code>"),
419    "option" => {
420      if cfg!(feature = "ndg-flavored") && auto_link_options {
421        // Check if validation is enabled and option is valid
422        let should_link =
423          valid_options.is_none_or(|opts| opts.contains(content)); // If no validation set, link all options
424
425        if should_link {
426          let option_id = format!("option-{}", content.replace('.', "-"));
427          format!(
428            "<a class=\"option-reference\" \
429             href=\"options.html#{option_id}\"><code \
430             class=\"nixos-option\">{escaped_content}</code></a>"
431          )
432        } else {
433          format!("<code class=\"nixos-option\">{escaped_content}</code>")
434        }
435      } else {
436        format!("<code class=\"nixos-option\">{escaped_content}</code>")
437      }
438    },
439    "var" => format!("<code class=\"nix-var\">{escaped_content}</code>"),
440    _ => format!("<span class=\"{role_type}-markup\">{escaped_content}</span>"),
441  }
442}
443
444/// Process MyST-style autolinks in markdown content.
445///
446/// Converts MyST-like autolinks supported by Nixpkgs-flavored commonmark:
447/// - `[](#anchor)` -> `[](#anchor) -> {{ANCHOR}}` (placeholder for comrak)
448/// - `[](https://url)` -> `<https://url>` (converted to standard autolink)
449///
450/// # Arguments
451///
452/// * `content` - The markdown content to process
453///
454/// # Returns
455///
456/// The processed markdown with `MyST` autolinks converted as a [`String`]
457#[must_use]
458pub fn process_myst_autolinks(content: &str) -> String {
459  let mut result = String::with_capacity(content.len());
460  let mut fence_tracker = crate::utils::codeblock::FenceTracker::new();
461
462  for line in content.lines() {
463    // Update fence tracking state
464    fence_tracker = fence_tracker.process_line(line);
465
466    // Only process MyST autolinks if we're not in a code block
467    if fence_tracker.in_code_block() {
468      result.push_str(line);
469    } else {
470      result.push_str(&process_line_myst_autolinks(line));
471    }
472    result.push('\n');
473  }
474
475  result
476}
477
478/// Process `MyST` autolinks in a single line.
479fn process_line_myst_autolinks(line: &str) -> String {
480  let mut result = String::with_capacity(line.len());
481  let mut chars = line.chars().peekable();
482
483  while let Some(ch) = chars.next() {
484    if ch == '[' && chars.peek() == Some(&']') {
485      chars.next(); // consume ']'
486
487      // Check if this is []{#...} syntax (inline anchor, not autolink)
488      // Nice pit, would be a shame if someone was to... fall into it.
489      if chars.peek() == Some(&'{') {
490        // This is inline anchor syntax, not autolink, keep as-is
491        result.push_str("[]");
492        continue;
493      }
494
495      if chars.peek() == Some(&'(') {
496        chars.next(); // consume '('
497
498        // Collect URL until ')'
499        let mut url = String::new();
500        let mut found_closing = false;
501        while let Some(&next_ch) = chars.peek() {
502          if next_ch == ')' {
503            chars.next(); // consume ')'
504            found_closing = true;
505            break;
506          }
507          url.push(next_ch);
508          chars.next();
509        }
510
511        if found_closing && !url.is_empty() {
512          // Check if it's an anchor link (starts with #) or a URL
513          if url.starts_with('#') {
514            // Add placeholder text for comrak to parse it as a link
515            let _ = write!(result, "[{{{{ANCHOR}}}}]({url})");
516          } else if url.starts_with("http://") || url.starts_with("https://") {
517            // Convert URL autolinks to standard <url> format
518            let _ = write!(result, "<{url}>");
519          } else {
520            // Keep other patterns as-is
521            let _ = write!(result, "[]({url})");
522          }
523        } else {
524          // Malformed, put back what we consumed
525          result.push_str("](");
526          result.push_str(&url);
527        }
528      } else {
529        // Not a link, put back consumed character
530        result.push(']');
531      }
532    } else {
533      result.push(ch);
534    }
535  }
536
537  result
538}
539
540/// Process inline anchors in markdown content.
541///
542/// This function processes inline anchor syntax like `[]{#my-anchor}` while
543/// being code-block aware to avoid processing inside code fences.
544///
545/// # Arguments
546///
547/// * `content` - The markdown content to process
548///
549/// # Returns
550///
551/// The processed markdown with inline anchors converted to HTML spans
552///
553/// # Panics
554///
555/// Panics if a code fence marker line is empty (which should not occur in valid
556/// markdown).
557#[cfg(feature = "nixpkgs")]
558#[must_use]
559pub fn process_inline_anchors(content: &str) -> String {
560  let mut result = String::with_capacity(content.len() + 100);
561  let mut fence_tracker = crate::utils::codeblock::FenceTracker::new();
562
563  for line in content.lines() {
564    let trimmed = line.trim_start();
565
566    // Update fence tracking state
567    fence_tracker = fence_tracker.process_line(line);
568
569    // Only process inline anchors if we're not in a code block
570    if fence_tracker.in_code_block() {
571      // In code block, keep line as-is
572      result.push_str(line);
573    } else {
574      // Check for list items with anchors:
575      // "- []{#id} content" or "1. []{#id} content"
576      if let Some(anchor_start) = find_list_item_anchor(trimmed)
577        && let Some(processed_line) =
578          process_list_item_anchor(line, anchor_start)
579      {
580        result.push_str(&processed_line);
581        result.push('\n');
582        continue;
583      }
584
585      // Process regular inline anchors in the line
586      result.push_str(&process_line_anchors(line));
587    }
588    result.push('\n');
589  }
590
591  result
592}
593
594/// Find if a line starts with a list marker followed by an anchor.
595fn find_list_item_anchor(trimmed: &str) -> Option<usize> {
596  // Check for unordered list: "- []{#id}" or "* []{#id}" or "+ []{#id}"
597  if (trimmed.starts_with("- ")
598    || trimmed.starts_with("* ")
599    || trimmed.starts_with("+ "))
600    && trimmed.len() > 2
601  {
602    let after_marker = &trimmed[2..];
603    if after_marker.starts_with("[]{#") {
604      return Some(2);
605    }
606  }
607
608  // Check for ordered list: "1. []{#id}" or "123. []{#id}"
609  let mut i = 0;
610  while i < trimmed.len()
611    && trimmed.chars().nth(i).unwrap_or(' ').is_ascii_digit()
612  {
613    i += 1;
614  }
615  if i > 0 && i < trimmed.len() - 1 && trimmed.chars().nth(i) == Some('.') {
616    let after_marker = &trimmed[i + 1..];
617    if after_marker.starts_with(" []{#") {
618      return Some(i + 2);
619    }
620  }
621
622  None
623}
624
625/// Process a list item line that contains an anchor.
626fn process_list_item_anchor(line: &str, anchor_start: usize) -> Option<String> {
627  let before_anchor = &line[..anchor_start];
628  let after_marker = &line[anchor_start..];
629
630  if !after_marker.starts_with("[]{#") {
631    return None;
632  }
633
634  // Find the end of the anchor: []{#id}
635  if let Some(anchor_end) = after_marker.find('}') {
636    let id = &after_marker[4..anchor_end]; // skip "[]{#" and take until '}'
637    let remaining_content = &after_marker[anchor_end + 1..]; // skip '}'
638
639    // Validate ID contains only allowed characters
640    if id
641      .chars()
642      .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
643      && !id.is_empty()
644    {
645      return Some(format!(
646        "{before_anchor}<span id=\"{id}\" \
647         class=\"nixos-anchor\"></span>{remaining_content}"
648      ));
649    }
650  }
651
652  None
653}
654
655/// Process inline anchors in a single line.
656fn process_line_anchors(line: &str) -> String {
657  let mut result = String::with_capacity(line.len());
658  let mut chars = line.chars().peekable();
659
660  while let Some(ch) = chars.next() {
661    if ch == '[' && chars.peek() == Some(&']') {
662      chars.next(); // consume ']'
663
664      // Check for {#id} pattern
665      if chars.peek() == Some(&'{') {
666        chars.next(); // consume '{'
667        if chars.peek() == Some(&'#') {
668          chars.next(); // consume '#'
669
670          // Collect the ID
671          let mut id = String::new();
672          while let Some(&next_ch) = chars.peek() {
673            if next_ch == '}' {
674              chars.next(); // consume '}'
675
676              // Validate ID and create span
677              if !id.is_empty()
678                && id
679                  .chars()
680                  .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
681              {
682                let _ = write!(
683                  result,
684                  "<span id=\"{id}\" class=\"nixos-anchor\"></span>"
685                );
686              } else {
687                // Invalid ID, put back original text
688                let _ = write!(result, "[]{{{{#{id}}}}}");
689              }
690              break;
691            } else if next_ch.is_ascii_alphanumeric()
692              || next_ch == '-'
693              || next_ch == '_'
694            {
695              id.push(next_ch);
696              chars.next();
697            } else {
698              // Invalid character, put back original text
699              let _ = write!(result, "[]{{{{#{id}");
700              break;
701            }
702          }
703        } else {
704          // Not an anchor, put back consumed characters
705          result.push_str("]{");
706        }
707      } else {
708        // Not an anchor, put back consumed character
709        result.push(']');
710      }
711    } else {
712      result.push(ch);
713    }
714  }
715
716  result
717}
718
719/// Process block elements in markdown content.
720///
721/// This function processes block elements including admonitions, figures, and
722/// definition lists while being code-block aware to avoid processing inside
723/// code fences.
724///
725/// # Arguments
726/// * `content` - The markdown content to process
727///
728/// # Returns
729/// The processed markdown with block elements converted to HTML
730///
731/// # Panics
732///
733/// Panics if a code fence marker line is empty (which should not occur in valid
734/// markdown).
735#[cfg(feature = "nixpkgs")]
736#[must_use]
737pub fn process_block_elements(content: &str) -> String {
738  let mut result = Vec::new();
739  let mut lines = content.lines().peekable();
740  let mut fence_tracker = crate::utils::codeblock::FenceTracker::new();
741
742  while let Some(line) = lines.next() {
743    // Update fence tracking state
744    fence_tracker = fence_tracker.process_line(line);
745
746    // Only process block elements if we're not in a code block
747    if !fence_tracker.in_code_block() {
748      // Check for GitHub-style callouts: > [!TYPE]
749      if let Some((callout_type, initial_content)) = parse_github_callout(line)
750      {
751        let content =
752          collect_github_callout_content(&mut lines, &initial_content);
753        let admonition = render_admonition(&callout_type, None, &content);
754        result.push(admonition);
755        continue;
756      }
757
758      // Check for fenced admonitions: ::: {.type}
759      if let Some((adm_type, id)) = parse_fenced_admonition_start(line) {
760        let content = collect_fenced_content(&mut lines);
761        let admonition = render_admonition(&adm_type, id.as_deref(), &content);
762        result.push(admonition);
763        continue;
764      }
765
766      // Check for figures: ::: {.figure #id}
767      if let Some((id, title, content)) = parse_figure_block(line, &mut lines) {
768        let figure = render_figure(id.as_deref(), &title, &content);
769        result.push(figure);
770        continue;
771      }
772    }
773
774    // Regular line, keep as-is
775    result.push(line.to_string());
776  }
777
778  result.join("\n")
779}
780
781/// Parse GitHub-style callout syntax: > [!TYPE] content
782fn parse_github_callout(line: &str) -> Option<(String, String)> {
783  let trimmed = line.trim_start();
784  if !trimmed.starts_with("> [!") {
785    return None;
786  }
787
788  // Find the closing bracket
789  if let Some(close_bracket) = trimmed.find(']')
790    && close_bracket > 4
791  {
792    let callout_type = &trimmed[4..close_bracket];
793
794    // Validate callout type
795    match callout_type {
796      "NOTE" | "TIP" | "IMPORTANT" | "WARNING" | "CAUTION" | "DANGER" => {
797        let content = trimmed[close_bracket + 1..].trim();
798        return Some((callout_type.to_lowercase(), content.to_string()));
799      },
800      _ => return None,
801    }
802  }
803
804  None
805}
806
807/// Collect content for GitHub-style callouts
808fn collect_github_callout_content(
809  lines: &mut std::iter::Peekable<std::str::Lines>,
810  initial_content: &str,
811) -> String {
812  let mut content = String::new();
813
814  if !initial_content.is_empty() {
815    content.push_str(initial_content);
816    content.push('\n');
817  }
818
819  while let Some(line) = lines.peek() {
820    let trimmed = line.trim_start();
821    if trimmed.starts_with('>') {
822      let content_part = trimmed.strip_prefix('>').unwrap_or("").trim_start();
823      content.push_str(content_part);
824      content.push('\n');
825      lines.next(); // consume the line
826    } else {
827      break;
828    }
829  }
830
831  content.trim().to_string()
832}
833
834/// Parse fenced admonition start: ::: {.type #id}
835fn parse_fenced_admonition_start(
836  line: &str,
837) -> Option<(String, Option<String>)> {
838  let trimmed = line.trim();
839  if !trimmed.starts_with(":::") {
840    return None;
841  }
842
843  let after_colons = trimmed[3..].trim_start();
844  if !after_colons.starts_with("{.") {
845    return None;
846  }
847
848  // Find the closing brace
849  if let Some(close_brace) = after_colons.find('}') {
850    let content = &after_colons[2..close_brace]; // Skip "{."
851
852    // Parse type and optional ID
853    let parts: Vec<&str> = content.split_whitespace().collect();
854    if let Some(&adm_type) = parts.first() {
855      let id = parts
856        .iter()
857        .find(|part| part.starts_with('#'))
858        .map(|id_part| id_part[1..].to_string()); // Remove '#'
859
860      return Some((adm_type.to_string(), id));
861    }
862  }
863
864  None
865}
866
867/// Collect content until closing :::
868fn collect_fenced_content(
869  lines: &mut std::iter::Peekable<std::str::Lines>,
870) -> String {
871  let mut content = String::new();
872
873  for line in lines.by_ref() {
874    if line.trim().starts_with(":::") {
875      break;
876    }
877    content.push_str(line);
878    content.push('\n');
879  }
880
881  content.trim().to_string()
882}
883
884/// Parse figure block: ::: {.figure #id}
885#[allow(
886  clippy::option_if_let_else,
887  reason = "Nested options clearer with if-let"
888)]
889fn parse_figure_block(
890  line: &str,
891  lines: &mut std::iter::Peekable<std::str::Lines>,
892) -> Option<(Option<String>, String, String)> {
893  let trimmed = line.trim();
894  if !trimmed.starts_with(":::") {
895    return None;
896  }
897
898  let after_colons = trimmed[3..].trim_start();
899  if !after_colons.starts_with("{.figure") {
900    return None;
901  }
902
903  // Extract ID if present
904  let id = if let Some(hash_pos) = after_colons.find('#') {
905    if let Some(close_brace) = after_colons.find('}') {
906      if hash_pos < close_brace {
907        Some(after_colons[hash_pos + 1..close_brace].trim().to_string())
908      } else {
909        None
910      }
911    } else {
912      None
913    }
914  } else {
915    None
916  };
917
918  // Get title from next line (should start with #)
919  let title = if let Some(title_line) = lines.next() {
920    let trimmed_title = title_line.trim();
921    if let Some(this) = trimmed_title.strip_prefix('#') {
922      { this.trim_matches(char::is_whitespace) }.to_string()
923    } else {
924      // Put the line back if it's not a title
925      return None;
926    }
927  } else {
928    return None;
929  };
930
931  // Collect figure content
932  let mut content = String::new();
933  for line in lines.by_ref() {
934    if line.trim().starts_with(":::") {
935      break;
936    }
937    content.push_str(line);
938    content.push('\n');
939  }
940
941  Some((id, title, content.trim().to_string()))
942}
943
944/// Render an admonition as HTML
945fn render_admonition(
946  adm_type: &str,
947  id: Option<&str>,
948  content: &str,
949) -> String {
950  let capitalized_type = crate::utils::capitalize_first(adm_type);
951  let id_attr = id.map_or(String::new(), |id| format!(" id=\"{id}\""));
952
953  format!(
954    "<div class=\"admonition {adm_type}\"{id_attr}>\n<p \
955     class=\"admonition-title\">{capitalized_type}</p>\n\n{content}\n\n</div>"
956  )
957}
958
959/// Render a figure as HTML
960fn render_figure(id: Option<&str>, title: &str, content: &str) -> String {
961  let id_attr = id.map_or(String::new(), |id| format!(" id=\"{id}\""));
962
963  format!(
964    "<figure{id_attr}>\n<figcaption>{title}</figcaption>\n{content}\n</figure>"
965  )
966}
967
968/// Process manpage references in HTML content.
969///
970/// This function processes manpage references by finding span elements with
971/// manpage-reference class and converting them to links when URLs are
972/// available.
973///
974/// # Arguments
975/// * `html` - The HTML content to process
976/// * `manpage_urls` - Optional mapping of manpage names to URLs
977///
978/// # Returns
979/// The processed HTML with manpage references converted to links
980#[cfg(feature = "nixpkgs")]
981#[must_use]
982#[allow(
983  clippy::implicit_hasher,
984  reason = "Standard HashMap sufficient for this use case"
985)]
986pub fn process_manpage_references(
987  html: &str,
988  manpage_urls: Option<&std::collections::HashMap<String, String>>,
989) -> String {
990  process_safe(
991    html,
992    |html| {
993      use kuchikikiki::NodeRef;
994      use tendril::TendrilSink;
995
996      let document = kuchikikiki::parse_html().one(html);
997      let mut to_replace = Vec::new();
998
999      // Find all spans with class "manpage-reference"
1000      for span_node in safe_select(&document, "span.manpage-reference") {
1001        let span_el = span_node;
1002        let span_text = span_el.text_contents();
1003
1004        if let Some(urls) = manpage_urls {
1005          // Check for direct URL match
1006          if let Some(url) = urls.get(&span_text) {
1007            let clean_url = extract_url_from_html(url);
1008            let link = NodeRef::new_element(
1009              markup5ever::QualName::new(
1010                None,
1011                markup5ever::ns!(html),
1012                markup5ever::local_name!("a"),
1013              ),
1014              vec![
1015                (
1016                  kuchikikiki::ExpandedName::new("", "href"),
1017                  kuchikikiki::Attribute {
1018                    prefix: None,
1019                    value:  clean_url.into(),
1020                  },
1021                ),
1022                (
1023                  kuchikikiki::ExpandedName::new("", "class"),
1024                  kuchikikiki::Attribute {
1025                    prefix: None,
1026                    value:  "manpage-reference".into(),
1027                  },
1028                ),
1029              ],
1030            );
1031            link.append(NodeRef::new_text(span_text.clone()));
1032            to_replace.push((span_el.clone(), link));
1033          }
1034        }
1035      }
1036
1037      // Apply replacements
1038      for (old, new) in to_replace {
1039        old.insert_before(new);
1040        old.detach();
1041      }
1042
1043      let mut out = Vec::new();
1044      let _ = document.serialize(&mut out);
1045      String::from_utf8(out).unwrap_or_default()
1046    },
1047    // Return original HTML on error
1048    "",
1049  )
1050}
1051
1052/// Process option references
1053/// Converts {option} role markup into links to the options page.
1054///
1055/// This processes `<code>` elements that have the `nixos-option` class, i.e.,
1056/// {option} role markup and convert them into links to the options page.
1057///
1058/// # Arguments
1059///
1060/// * `html` - The HTML string to process.
1061/// * `valid_options` - Optional set of valid option names for validation.
1062///
1063/// # Returns
1064///
1065/// The HTML string with option references rewritten as links.
1066#[cfg(feature = "ndg-flavored")]
1067#[must_use]
1068#[allow(
1069  clippy::implicit_hasher,
1070  reason = "Standard HashSet sufficient for this use case"
1071)]
1072pub fn process_option_references(
1073  html: &str,
1074  valid_options: Option<&std::collections::HashSet<String>>,
1075) -> String {
1076  use kuchikikiki::{Attribute, ExpandedName, NodeRef};
1077  use markup5ever::{QualName, local_name, ns};
1078  use tendril::TendrilSink;
1079
1080  process_safe(
1081    html,
1082    |html| {
1083      let document = kuchikikiki::parse_html().one(html);
1084
1085      let mut to_replace = vec![];
1086
1087      // Only process code elements that already have the nixos-option class
1088      // from {option} role syntax
1089      for code_node in safe_select(&document, "code.nixos-option") {
1090        let code_el = code_node;
1091        let code_text = code_el.text_contents();
1092
1093        // Skip if already wrapped in an option-reference link
1094        let mut is_already_option_ref = false;
1095        let mut current = code_el.parent();
1096        while let Some(parent) = current {
1097          if let Some(element) = parent.as_element()
1098            && element.name.local == local_name!("a")
1099            && let Some(class_attr) =
1100              element.attributes.borrow().get(local_name!("class"))
1101            && class_attr.contains("option-reference")
1102          {
1103            is_already_option_ref = true;
1104            break;
1105          }
1106          current = parent.parent();
1107        }
1108
1109        if !is_already_option_ref {
1110          // Check if validation is enabled and option is valid
1111          let should_link =
1112            valid_options.is_none_or(|opts| opts.contains(code_text.as_str())); // If no validation set, link all options
1113
1114          if should_link {
1115            let option_id = format!("option-{}", code_text.replace('.', "-"));
1116            let attrs = vec![
1117              (ExpandedName::new("", "href"), Attribute {
1118                prefix: None,
1119                value:  format!("options.html#{option_id}"),
1120              }),
1121              (ExpandedName::new("", "class"), Attribute {
1122                prefix: None,
1123                value:  "option-reference".into(),
1124              }),
1125            ];
1126            let a = NodeRef::new_element(
1127              QualName::new(None, ns!(html), local_name!("a")),
1128              attrs,
1129            );
1130            let code = NodeRef::new_element(
1131              QualName::new(None, ns!(html), local_name!("code")),
1132              vec![],
1133            );
1134            code.append(NodeRef::new_text(code_text.clone()));
1135            a.append(code);
1136            to_replace.push((code_el.clone(), a));
1137          }
1138          // If should_link is false, leave the code element as-is (no wrapping)
1139        }
1140      }
1141
1142      for (old, new) in to_replace {
1143        old.insert_before(new);
1144        old.detach();
1145      }
1146
1147      let mut out = Vec::new();
1148      let _ = document.serialize(&mut out);
1149      String::from_utf8(out).unwrap_or_default()
1150    },
1151    // Return original HTML on error
1152    "",
1153  )
1154}
1155
1156/// Extract URL from HTML anchor tag or return the string as-is if it's a plain
1157/// URL
1158fn extract_url_from_html(url_or_html: &str) -> &str {
1159  // Check if it looks like HTML (starts with <a href=")
1160  if url_or_html.starts_with("<a href=\"") {
1161    // Extract the URL from href attribute
1162    if let Some(start) = url_or_html.find("href=\"") {
1163      let start = start + 6; // Skip 'href="'
1164      if let Some(end) = url_or_html[start..].find('"') {
1165        return &url_or_html[start..start + end];
1166      }
1167    }
1168  }
1169
1170  // Return as-is if not HTML or if extraction fails
1171  url_or_html
1172}
ndg_commonmark/processor/extensions.rs

ndg_commonmark/processor/
extensions.rs