ndg_commonmark/processor/
extensions.rs

1//! Feature-specific Markdown processing extensions.
2use std::{fmt::Write, fs, path::Path};
3
4use html_escape;
5
6use super::process::process_safe;
7
8/// Safely select DOM elements with graceful error handling.
9fn safe_select(
10  document: &kuchikikiki::NodeRef,
11  selector: &str,
12) -> Vec<kuchikikiki::NodeRef> {
13  match document.select(selector) {
14    Ok(selections) => selections.map(|sel| sel.as_node().clone()).collect(),
15    Err(e) => {
16      log::warn!("DOM selector '{selector}' failed: {e:?}");
17      Vec::new()
18    },
19  }
20}
21
22/// Apply GitHub Flavored Markdown (GFM) extensions to the input markdown.
23///
24/// This is a placeholder for future GFM-specific preprocessing or AST
25/// transformations. In practice, most GFM features are enabled via comrak
26/// options, but additional logic (such as custom tables, task lists, etc.) can
27/// be added here.
28///
29/// # Arguments
30/// * `markdown` - The input markdown text
31///
32/// # Returns
33/// The processed markdown text with GFM extensions applied
34#[cfg(feature = "gfm")]
35#[must_use]
36pub fn apply_gfm_extensions(markdown: &str) -> String {
37  // XXX: Comrak already supports GFM, but if there is any feature in the spec
38  // that is not implemented as we'd like for it to be, we can add it here.
39  markdown.to_owned()
40}
41
42/// Maximum recursion depth for file includes to prevent infinite recursion.
43const MAX_INCLUDE_DEPTH: usize = 8;
44
45/// Check if a path is safe for file inclusion (no absolute paths, no parent
46/// directory traversal).
47#[cfg(feature = "nixpkgs")]
48fn is_safe_path(path: &str, _base_dir: &Path) -> bool {
49  let p = Path::new(path);
50  if p.is_absolute() || path.contains('\\') {
51    return false;
52  }
53
54  // Reject any path containing parent directory components
55  for component in p.components() {
56    if matches!(component, std::path::Component::ParentDir) {
57      return false;
58    }
59  }
60
61  true
62}
63
64/// Parse the custom output directive from an include block.
65#[cfg(feature = "nixpkgs")]
66#[allow(
67  clippy::option_if_let_else,
68  reason = "Nested options are clearer with if-let"
69)]
70fn parse_include_directive(line: &str) -> Option<String> {
71  if let Some(start) = line.find("html:into-file=") {
72    let start = start + "html:into-file=".len();
73    if let Some(end) = line[start..].find(' ') {
74      Some(line[start..start + end].to_string())
75    } else {
76      Some(line[start..].trim().to_string())
77    }
78  } else {
79    None
80  }
81}
82
83/// Read and process files listed in an include block.
84#[cfg(feature = "nixpkgs")]
85#[allow(
86  clippy::needless_pass_by_value,
87  reason = "Owned value needed for cloning in loop"
88)]
89fn read_includes(
90  listing: &str,
91  base_dir: &Path,
92  custom_output: Option<String>,
93  included_files: &mut Vec<crate::types::IncludedFile>,
94  depth: usize,
95) -> Result<String, String> {
96  let mut result = String::new();
97
98  for line in listing.lines() {
99    let trimmed = line.trim();
100    if trimmed.is_empty() || !is_safe_path(trimmed, base_dir) {
101      continue;
102    }
103    let full_path = base_dir.join(trimmed);
104    log::info!("Including file: {}", full_path.display());
105
106    match fs::read_to_string(&full_path) {
107      Ok(content) => {
108        let file_dir = full_path.parent().unwrap_or(base_dir);
109        let (processed_content, nested_includes) =
110          process_file_includes(&content, file_dir, depth + 1)?;
111
112        result.push_str(&processed_content);
113        if !processed_content.ends_with('\n') {
114          result.push('\n');
115        }
116
117        included_files.push(crate::types::IncludedFile {
118          path:          trimmed.to_string(),
119          custom_output: custom_output.clone(),
120        });
121
122        // Normalize nested include paths relative to original base_dir
123        for nested in nested_includes {
124          let nested_full_path = file_dir.join(&nested.path);
125          if let Ok(normalized_path) = nested_full_path.strip_prefix(base_dir) {
126            included_files.push(crate::types::IncludedFile {
127              path:          normalized_path.to_string_lossy().to_string(),
128              custom_output: nested.custom_output,
129            });
130          }
131        }
132      },
133      Err(_) => {
134        let _ = writeln!(
135          result,
136          "<!-- ndg: could not include file: {} -->",
137          full_path.display()
138        );
139      },
140    }
141  }
142  Ok(result)
143}
144
145/// Process file includes in Nixpkgs/NixOS documentation.
146///
147/// This function processes file include syntax:
148///
149/// ````markdown
150/// ```{=include=}
151/// path/to/file1.md
152/// path/to/file2.md
153/// ```
154/// ````
155///
156/// # Arguments
157///
158/// * `markdown` - The input markdown text
159/// * `base_dir` - The base directory for resolving relative file paths
160/// * `depth` - Current recursion depth (use 0 for initial call)
161///
162/// # Returns
163///
164/// Returns `Ok((processed_markdown, included_files))` where `included_files` is
165/// a list of all successfully included files.
166///
167/// # Errors
168///
169/// Returns `Err(message)` if recursion depth exceeds [`MAX_INCLUDE_DEPTH`],
170/// which likely indicates a circular include cycle.
171///
172/// # Safety
173///
174/// Only relative paths without ".." are allowed for security.
175#[cfg(feature = "nixpkgs")]
176pub fn process_file_includes(
177  markdown: &str,
178  base_dir: &std::path::Path,
179  depth: usize,
180) -> Result<(String, Vec<crate::types::IncludedFile>), String> {
181  // Check recursion depth limit
182  if depth >= MAX_INCLUDE_DEPTH {
183    return Err(format!(
184      "Maximum include recursion depth ({MAX_INCLUDE_DEPTH}) exceeded. This \
185       likely indicates a cycle in file includes."
186    ));
187  }
188
189  let mut output = String::new();
190  let mut lines = markdown.lines();
191  let mut fence_tracker = crate::utils::codeblock::FenceTracker::new();
192  let mut all_included_files: Vec<crate::types::IncludedFile> = Vec::new();
193
194  while let Some(line) = lines.next() {
195    let trimmed = line.trim_start();
196
197    if !fence_tracker.in_code_block() && trimmed.starts_with("```{=include=}") {
198      let custom_output = parse_include_directive(trimmed);
199
200      let mut include_listing = String::new();
201      for next_line in lines.by_ref() {
202        if next_line.trim_start().starts_with("```") {
203          break;
204        }
205        include_listing.push_str(next_line);
206        include_listing.push('\n');
207      }
208
209      let included = read_includes(
210        &include_listing,
211        base_dir,
212        custom_output,
213        &mut all_included_files,
214        depth,
215      )?;
216      output.push_str(&included);
217      continue;
218    }
219
220    // Update fence tracking state
221    fence_tracker = fence_tracker.process_line(line);
222
223    output.push_str(line);
224    output.push('\n');
225  }
226
227  Ok((output, all_included_files))
228}
229
230/// Process role markup in markdown content.
231///
232/// This function processes role syntax like `{command}ls -la`
233///
234/// # Arguments
235///
236/// * `content` - The markdown content to process
237/// * `manpage_urls` - Optional mapping of manpage names to URLs
238/// * `auto_link_options` - Whether to convert {option} roles to links
239/// * `valid_options` - Optional set of valid option names for validation
240///
241/// # Returns
242///
243/// The processed markdown with role markup converted to HTML
244#[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
245#[must_use]
246#[allow(
247  clippy::implicit_hasher,
248  reason = "Standard HashMap/HashSet sufficient for this use case"
249)]
250pub fn process_role_markup(
251  content: &str,
252  manpage_urls: Option<&std::collections::HashMap<String, String>>,
253  auto_link_options: bool,
254  valid_options: Option<&std::collections::HashSet<String>>,
255) -> String {
256  let mut result = String::new();
257  let mut chars = content.chars().peekable();
258  let mut tracker = crate::utils::codeblock::InlineTracker::new();
259
260  while let Some(ch) = chars.next() {
261    // Handle backticks (code fences and inline code)
262    if ch == '`' {
263      let (new_tracker, tick_count) = tracker.process_backticks(&mut chars);
264      tracker = new_tracker;
265
266      // Add all the backticks
267      result.push_str(&"`".repeat(tick_count));
268      continue;
269    }
270
271    // Handle tilde code fences (~~~)
272    if ch == '~' && chars.peek() == Some(&'~') {
273      let (new_tracker, tilde_count) = tracker.process_tildes(&mut chars);
274      tracker = new_tracker;
275
276      result.push_str(&"~".repeat(tilde_count));
277      continue;
278    }
279
280    // Handle newlines
281    if ch == '\n' {
282      tracker = tracker.process_newline();
283      result.push(ch);
284      continue;
285    }
286
287    // Process role markup only if we're not in any kind of code
288    if ch == '{' && !tracker.in_any_code() {
289      // Collect remaining characters to test parsing
290      let remaining: Vec<char> = chars.clone().collect();
291      let remaining_str: String = remaining.iter().collect();
292      let mut temp_chars = remaining_str.chars().peekable();
293
294      if let Some(role_markup) = parse_role_markup(
295        &mut temp_chars,
296        manpage_urls,
297        auto_link_options,
298        valid_options,
299      ) {
300        // Valid role markup found, advance the main iterator
301        let remaining_after_parse: String = temp_chars.collect();
302        let consumed = remaining_str.len() - remaining_after_parse.len();
303        for _ in 0..consumed {
304          chars.next();
305        }
306        result.push_str(&role_markup);
307      } else {
308        // Not a valid role markup, keep the original character
309        result.push(ch);
310      }
311    } else {
312      result.push(ch);
313    }
314  }
315
316  result
317}
318
319/// Parse a role markup from the character iterator.
320///
321/// # Returns
322///
323/// `Some(html)` if a valid role markup is found, `None` otherwise.
324fn parse_role_markup(
325  chars: &mut std::iter::Peekable<std::str::Chars>,
326  manpage_urls: Option<&std::collections::HashMap<String, String>>,
327  auto_link_options: bool,
328  valid_options: Option<&std::collections::HashSet<String>>,
329) -> Option<String> {
330  let mut role_name = String::new();
331
332  // Parse role name (lowercase letters only)
333  while let Some(&ch) = chars.peek() {
334    if ch.is_ascii_lowercase() {
335      role_name.push(ch);
336      chars.next();
337    } else {
338      break;
339    }
340  }
341
342  // Must have a non-empty role name
343  if role_name.is_empty() {
344    return None;
345  }
346
347  // Expect closing brace
348  if chars.peek() != Some(&'}') {
349    return None;
350  }
351  chars.next(); // consume '}'
352
353  // Expect opening backtick
354  if chars.peek() != Some(&'`') {
355    return None;
356  }
357  chars.next(); // consume '`'
358
359  // Parse content until closing backtick
360  let mut content = String::new();
361  for ch in chars.by_ref() {
362    if ch == '`' {
363      // Found closing backtick, validate content
364      // Most role types should not have empty content
365      if content.is_empty() && !matches!(role_name.as_str(), "manpage") {
366        return None; // reject empty content for most roles
367      }
368      return Some(format_role_markup(
369        &role_name,
370        &content,
371        manpage_urls,
372        auto_link_options,
373        valid_options,
374      ));
375    }
376    content.push(ch);
377  }
378
379  // No closing backtick found
380  None
381}
382
383/// Format the role markup as HTML based on the role type and content.
384#[must_use]
385#[allow(
386  clippy::option_if_let_else,
387  reason = "Nested options clearer with if-let"
388)]
389#[allow(
390  clippy::implicit_hasher,
391  reason = "Standard HashMap/HashSet sufficient for this use case"
392)]
393pub fn format_role_markup(
394  role_type: &str,
395  content: &str,
396  manpage_urls: Option<&std::collections::HashMap<String, String>>,
397  auto_link_options: bool,
398  valid_options: Option<&std::collections::HashSet<String>>,
399) -> String {
400  let escaped_content = html_escape::encode_text(content);
401  match role_type {
402    "manpage" => {
403      if let Some(urls) = manpage_urls {
404        if let Some(url) = urls.get(content) {
405          format!(
406            "<a href=\"{url}\" \
407             class=\"manpage-reference\">{escaped_content}</a>"
408          )
409        } else {
410          format!("<span class=\"manpage-reference\">{escaped_content}</span>")
411        }
412      } else {
413        format!("<span class=\"manpage-reference\">{escaped_content}</span>")
414      }
415    },
416    "command" => format!("<code class=\"command\">{escaped_content}</code>"),
417    "env" => format!("<code class=\"env-var\">{escaped_content}</code>"),
418    "file" => format!("<code class=\"file-path\">{escaped_content}</code>"),
419    "option" => {
420      if cfg!(feature = "ndg-flavored") && auto_link_options {
421        // Check if validation is enabled and option is valid
422        let should_link =
423          valid_options.is_none_or(|opts| opts.contains(content)); // If no validation set, link all options
424
425        if should_link {
426          let option_id = format!("option-{}", content.replace('.', "-"));
427          format!(
428            "<a class=\"option-reference\" \
429             href=\"options.html#{option_id}\"><code \
430             class=\"nixos-option\">{escaped_content}</code></a>"
431          )
432        } else {
433          format!("<code class=\"nixos-option\">{escaped_content}</code>")
434        }
435      } else {
436        format!("<code class=\"nixos-option\">{escaped_content}</code>")
437      }
438    },
439    "var" => format!("<code class=\"nix-var\">{escaped_content}</code>"),
440    _ => format!("<span class=\"{role_type}-markup\">{escaped_content}</span>"),
441  }
442}
443
444/// Process MyST-style autolinks in markdown content.
445///
446/// Converts MyST-like autolinks supported by Nixpkgs-flavored commonmark:
447/// - `[](#anchor)` -> `[](#anchor) -> {{ANCHOR}}` (placeholder for comrak)
448/// - `[](https://url)` -> `<https://url>` (converted to standard autolink)
449///
450/// # Arguments
451///
452/// * `content` - The markdown content to process
453///
454/// # Returns
455///
456/// The processed markdown with `MyST` autolinks converted as a [`String`]
457#[must_use]
458pub fn process_myst_autolinks(content: &str) -> String {
459  let mut result = String::with_capacity(content.len());
460  let mut fence_tracker = crate::utils::codeblock::FenceTracker::new();
461
462  for line in content.lines() {
463    // Update fence tracking state
464    fence_tracker = fence_tracker.process_line(line);
465
466    // Only process MyST autolinks if we're not in a code block
467    if fence_tracker.in_code_block() {
468      result.push_str(line);
469    } else {
470      result.push_str(&process_line_myst_autolinks(line));
471    }
472    result.push('\n');
473  }
474
475  result
476}
477
478/// Process `MyST` autolinks in a single line.
479fn process_line_myst_autolinks(line: &str) -> String {
480  let mut result = String::with_capacity(line.len());
481  let mut chars = line.chars().peekable();
482
483  while let Some(ch) = chars.next() {
484    if ch == '[' && chars.peek() == Some(&']') {
485      chars.next(); // consume ']'
486
487      // Check if this is []{#...} syntax (inline anchor, not autolink)
488      // Nice pit, would be a shame if someone was to... fall into it.
489      if chars.peek() == Some(&'{') {
490        // This is inline anchor syntax, not autolink, keep as-is
491        result.push_str("[]");
492        continue;
493      }
494
495      if chars.peek() == Some(&'(') {
496        chars.next(); // consume '('
497
498        // Collect URL until ')'
499        let mut url = String::new();
500        let mut found_closing = false;
501        while let Some(&next_ch) = chars.peek() {
502          if next_ch == ')' {
503            chars.next(); // consume ')'
504            found_closing = true;
505            break;
506          }
507          url.push(next_ch);
508          chars.next();
509        }
510
511        if found_closing && !url.is_empty() {
512          // Check if it's an anchor link (starts with #) or a URL
513          if url.starts_with('#') {
514            // Add placeholder text for comrak to parse it as a link
515            let _ = write!(result, "[{{{{ANCHOR}}}}]({url})");
516          } else if url.starts_with("http://") || url.starts_with("https://") {
517            // Convert URL autolinks to standard <url> format
518            let _ = write!(result, "<{url}>");
519          } else {
520            // Keep other patterns as-is
521            let _ = write!(result, "[]({url})");
522          }
523        } else {
524          // Malformed, put back what we consumed
525          result.push_str("](");
526          result.push_str(&url);
527        }
528      } else {
529        // Not a link, put back consumed character
530        result.push(']');
531      }
532    } else {
533      result.push(ch);
534    }
535  }
536
537  result
538}
539
540/// Process inline anchors in markdown content.
541///
542/// This function processes inline anchor syntax like `[]{#my-anchor}` while
543/// being code-block aware to avoid processing inside code fences.
544///
545/// # Arguments
546///
547/// * `content` - The markdown content to process
548///
549/// # Returns
550///
551/// The processed markdown with inline anchors converted to HTML spans
552///
553/// # Panics
554///
555/// Panics if a code fence marker line is empty (which should not occur in valid
556/// markdown).
557#[cfg(feature = "nixpkgs")]
558#[must_use]
559pub fn process_inline_anchors(content: &str) -> String {
560  let mut result = String::with_capacity(content.len() + 100);
561  let mut fence_tracker = crate::utils::codeblock::FenceTracker::new();
562
563  for line in content.lines() {
564    let trimmed = line.trim_start();
565
566    // Update fence tracking state
567    fence_tracker = fence_tracker.process_line(line);
568
569    // Only process inline anchors if we're not in a code block
570    if fence_tracker.in_code_block() {
571      // In code block, keep line as-is
572      result.push_str(line);
573    } else {
574      // Check for list items with anchors:
575      // "- []{#id} content" or "1. []{#id} content"
576      if let Some(anchor_start) = find_list_item_anchor(trimmed) {
577        if let Some(processed_line) =
578          process_list_item_anchor(line, anchor_start)
579        {
580          result.push_str(&processed_line);
581          result.push('\n');
582          continue;
583        }
584      }
585
586      // Process regular inline anchors in the line
587      result.push_str(&process_line_anchors(line));
588    }
589    result.push('\n');
590  }
591
592  result
593}
594
595/// Find if a line starts with a list marker followed by an anchor.
596fn find_list_item_anchor(trimmed: &str) -> Option<usize> {
597  // Check for unordered list: "- []{#id}" or "* []{#id}" or "+ []{#id}"
598  if (trimmed.starts_with("- ")
599    || trimmed.starts_with("* ")
600    || trimmed.starts_with("+ "))
601    && trimmed.len() > 2
602  {
603    let after_marker = &trimmed[2..];
604    if after_marker.starts_with("[]{#") {
605      return Some(2);
606    }
607  }
608
609  // Check for ordered list: "1. []{#id}" or "123. []{#id}"
610  let mut i = 0;
611  while i < trimmed.len()
612    && trimmed.chars().nth(i).unwrap_or(' ').is_ascii_digit()
613  {
614    i += 1;
615  }
616  if i > 0 && i < trimmed.len() - 1 && trimmed.chars().nth(i) == Some('.') {
617    let after_marker = &trimmed[i + 1..];
618    if after_marker.starts_with(" []{#") {
619      return Some(i + 2);
620    }
621  }
622
623  None
624}
625
626/// Process a list item line that contains an anchor.
627fn process_list_item_anchor(line: &str, anchor_start: usize) -> Option<String> {
628  let before_anchor = &line[..anchor_start];
629  let after_marker = &line[anchor_start..];
630
631  if !after_marker.starts_with("[]{#") {
632    return None;
633  }
634
635  // Find the end of the anchor: []{#id}
636  if let Some(anchor_end) = after_marker.find('}') {
637    let id = &after_marker[4..anchor_end]; // skip "[]{#" and take until '}'
638    let remaining_content = &after_marker[anchor_end + 1..]; // skip '}'
639
640    // Validate ID contains only allowed characters
641    if id
642      .chars()
643      .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
644      && !id.is_empty()
645    {
646      return Some(format!(
647        "{before_anchor}<span id=\"{id}\" \
648         class=\"nixos-anchor\"></span>{remaining_content}"
649      ));
650    }
651  }
652
653  None
654}
655
656/// Process inline anchors in a single line.
657fn process_line_anchors(line: &str) -> String {
658  let mut result = String::with_capacity(line.len());
659  let mut chars = line.chars().peekable();
660
661  while let Some(ch) = chars.next() {
662    if ch == '[' && chars.peek() == Some(&']') {
663      chars.next(); // consume ']'
664
665      // Check for {#id} pattern
666      if chars.peek() == Some(&'{') {
667        chars.next(); // consume '{'
668        if chars.peek() == Some(&'#') {
669          chars.next(); // consume '#'
670
671          // Collect the ID
672          let mut id = String::new();
673          while let Some(&next_ch) = chars.peek() {
674            if next_ch == '}' {
675              chars.next(); // consume '}'
676
677              // Validate ID and create span
678              if !id.is_empty()
679                && id
680                  .chars()
681                  .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
682              {
683                let _ = write!(
684                  result,
685                  "<span id=\"{id}\" class=\"nixos-anchor\"></span>"
686                );
687              } else {
688                // Invalid ID, put back original text
689                let _ = write!(result, "[]{{{{#{id}}}}}");
690              }
691              break;
692            } else if next_ch.is_ascii_alphanumeric()
693              || next_ch == '-'
694              || next_ch == '_'
695            {
696              id.push(next_ch);
697              chars.next();
698            } else {
699              // Invalid character, put back original text
700              let _ = write!(result, "[]{{{{#{id}");
701              break;
702            }
703          }
704        } else {
705          // Not an anchor, put back consumed characters
706          result.push_str("]{");
707        }
708      } else {
709        // Not an anchor, put back consumed character
710        result.push(']');
711      }
712    } else {
713      result.push(ch);
714    }
715  }
716
717  result
718}
719
720/// Process block elements in markdown content.
721///
722/// This function processes block elements including admonitions, figures, and
723/// definition lists while being code-block aware to avoid processing inside
724/// code fences.
725///
726/// # Arguments
727/// * `content` - The markdown content to process
728///
729/// # Returns
730/// The processed markdown with block elements converted to HTML
731///
732/// # Panics
733///
734/// Panics if a code fence marker line is empty (which should not occur in valid
735/// markdown).
736#[cfg(feature = "nixpkgs")]
737#[must_use]
738pub fn process_block_elements(content: &str) -> String {
739  let mut result = Vec::new();
740  let mut lines = content.lines().peekable();
741  let mut fence_tracker = crate::utils::codeblock::FenceTracker::new();
742
743  while let Some(line) = lines.next() {
744    // Update fence tracking state
745    fence_tracker = fence_tracker.process_line(line);
746
747    // Only process block elements if we're not in a code block
748    if !fence_tracker.in_code_block() {
749      // Check for GitHub-style callouts: > [!TYPE]
750      if let Some((callout_type, initial_content)) = parse_github_callout(line)
751      {
752        let content =
753          collect_github_callout_content(&mut lines, &initial_content);
754        let admonition = render_admonition(&callout_type, None, &content);
755        result.push(admonition);
756        continue;
757      }
758
759      // Check for fenced admonitions: ::: {.type}
760      if let Some((adm_type, id)) = parse_fenced_admonition_start(line) {
761        let content = collect_fenced_content(&mut lines);
762        let admonition = render_admonition(&adm_type, id.as_deref(), &content);
763        result.push(admonition);
764        continue;
765      }
766
767      // Check for figures: ::: {.figure #id}
768      if let Some((id, title, content)) = parse_figure_block(line, &mut lines) {
769        let figure = render_figure(id.as_deref(), &title, &content);
770        result.push(figure);
771        continue;
772      }
773    }
774
775    // Regular line, keep as-is
776    result.push(line.to_string());
777  }
778
779  result.join("\n")
780}
781
782/// Parse GitHub-style callout syntax: > [!TYPE] content
783fn parse_github_callout(line: &str) -> Option<(String, String)> {
784  let trimmed = line.trim_start();
785  if !trimmed.starts_with("> [!") {
786    return None;
787  }
788
789  // Find the closing bracket
790  if let Some(close_bracket) = trimmed.find(']') {
791    if close_bracket > 4 {
792      let callout_type = &trimmed[4..close_bracket];
793
794      // Validate callout type
795      match callout_type {
796        "NOTE" | "TIP" | "IMPORTANT" | "WARNING" | "CAUTION" | "DANGER" => {
797          let content = trimmed[close_bracket + 1..].trim();
798          return Some((callout_type.to_lowercase(), content.to_string()));
799        },
800        _ => return None,
801      }
802    }
803  }
804
805  None
806}
807
808/// Collect content for GitHub-style callouts
809fn collect_github_callout_content(
810  lines: &mut std::iter::Peekable<std::str::Lines>,
811  initial_content: &str,
812) -> String {
813  let mut content = String::new();
814
815  if !initial_content.is_empty() {
816    content.push_str(initial_content);
817    content.push('\n');
818  }
819
820  while let Some(line) = lines.peek() {
821    let trimmed = line.trim_start();
822    if trimmed.starts_with('>') {
823      let content_part = trimmed.strip_prefix('>').unwrap_or("").trim_start();
824      content.push_str(content_part);
825      content.push('\n');
826      lines.next(); // consume the line
827    } else {
828      break;
829    }
830  }
831
832  content.trim().to_string()
833}
834
835/// Parse fenced admonition start: ::: {.type #id}
836fn parse_fenced_admonition_start(
837  line: &str,
838) -> Option<(String, Option<String>)> {
839  let trimmed = line.trim();
840  if !trimmed.starts_with(":::") {
841    return None;
842  }
843
844  let after_colons = trimmed[3..].trim_start();
845  if !after_colons.starts_with("{.") {
846    return None;
847  }
848
849  // Find the closing brace
850  if let Some(close_brace) = after_colons.find('}') {
851    let content = &after_colons[2..close_brace]; // Skip "{."
852
853    // Parse type and optional ID
854    let parts: Vec<&str> = content.split_whitespace().collect();
855    if let Some(&adm_type) = parts.first() {
856      let id = parts
857        .iter()
858        .find(|part| part.starts_with('#'))
859        .map(|id_part| id_part[1..].to_string()); // Remove '#'
860
861      return Some((adm_type.to_string(), id));
862    }
863  }
864
865  None
866}
867
868/// Collect content until closing :::
869fn collect_fenced_content(
870  lines: &mut std::iter::Peekable<std::str::Lines>,
871) -> String {
872  let mut content = String::new();
873
874  for line in lines.by_ref() {
875    if line.trim().starts_with(":::") {
876      break;
877    }
878    content.push_str(line);
879    content.push('\n');
880  }
881
882  content.trim().to_string()
883}
884
885/// Parse figure block: ::: {.figure #id}
886#[allow(
887  clippy::option_if_let_else,
888  reason = "Nested options clearer with if-let"
889)]
890fn parse_figure_block(
891  line: &str,
892  lines: &mut std::iter::Peekable<std::str::Lines>,
893) -> Option<(Option<String>, String, String)> {
894  let trimmed = line.trim();
895  if !trimmed.starts_with(":::") {
896    return None;
897  }
898
899  let after_colons = trimmed[3..].trim_start();
900  if !after_colons.starts_with("{.figure") {
901    return None;
902  }
903
904  // Extract ID if present
905  let id = if let Some(hash_pos) = after_colons.find('#') {
906    if let Some(close_brace) = after_colons.find('}') {
907      if hash_pos < close_brace {
908        Some(after_colons[hash_pos + 1..close_brace].trim().to_string())
909      } else {
910        None
911      }
912    } else {
913      None
914    }
915  } else {
916    None
917  };
918
919  // Get title from next line (should start with #)
920  let title = if let Some(title_line) = lines.next() {
921    let trimmed_title = title_line.trim();
922    if let Some(this) = trimmed_title.strip_prefix('#') {
923      { this.trim_matches(char::is_whitespace) }.to_string()
924    } else {
925      // Put the line back if it's not a title
926      return None;
927    }
928  } else {
929    return None;
930  };
931
932  // Collect figure content
933  let mut content = String::new();
934  for line in lines.by_ref() {
935    if line.trim().starts_with(":::") {
936      break;
937    }
938    content.push_str(line);
939    content.push('\n');
940  }
941
942  Some((id, title, content.trim().to_string()))
943}
944
945/// Render an admonition as HTML
946fn render_admonition(
947  adm_type: &str,
948  id: Option<&str>,
949  content: &str,
950) -> String {
951  let capitalized_type = crate::utils::capitalize_first(adm_type);
952  let id_attr = id.map_or(String::new(), |id| format!(" id=\"{id}\""));
953
954  format!(
955    "<div class=\"admonition {adm_type}\"{id_attr}>\n<p \
956     class=\"admonition-title\">{capitalized_type}</p>\n\n{content}\n\n</div>"
957  )
958}
959
960/// Render a figure as HTML
961fn render_figure(id: Option<&str>, title: &str, content: &str) -> String {
962  let id_attr = id.map_or(String::new(), |id| format!(" id=\"{id}\""));
963
964  format!(
965    "<figure{id_attr}>\n<figcaption>{title}</figcaption>\n{content}\n</figure>"
966  )
967}
968
969/// Process manpage references in HTML content.
970///
971/// This function processes manpage references by finding span elements with
972/// manpage-reference class and converting them to links when URLs are
973/// available.
974///
975/// # Arguments
976/// * `html` - The HTML content to process
977/// * `manpage_urls` - Optional mapping of manpage names to URLs
978///
979/// # Returns
980/// The processed HTML with manpage references converted to links
981#[cfg(feature = "nixpkgs")]
982#[must_use]
983#[allow(
984  clippy::implicit_hasher,
985  reason = "Standard HashMap sufficient for this use case"
986)]
987pub fn process_manpage_references(
988  html: &str,
989  manpage_urls: Option<&std::collections::HashMap<String, String>>,
990) -> String {
991  process_safe(
992    html,
993    |html| {
994      use kuchikikiki::NodeRef;
995      use tendril::TendrilSink;
996
997      let document = kuchikikiki::parse_html().one(html);
998      let mut to_replace = Vec::new();
999
1000      // Find all spans with class "manpage-reference"
1001      for span_node in safe_select(&document, "span.manpage-reference") {
1002        let span_el = span_node;
1003        let span_text = span_el.text_contents();
1004
1005        if let Some(urls) = manpage_urls {
1006          // Check for direct URL match
1007          if let Some(url) = urls.get(&span_text) {
1008            let clean_url = extract_url_from_html(url);
1009            let link = NodeRef::new_element(
1010              markup5ever::QualName::new(
1011                None,
1012                markup5ever::ns!(html),
1013                markup5ever::local_name!("a"),
1014              ),
1015              vec![
1016                (
1017                  kuchikikiki::ExpandedName::new("", "href"),
1018                  kuchikikiki::Attribute {
1019                    prefix: None,
1020                    value:  clean_url.into(),
1021                  },
1022                ),
1023                (
1024                  kuchikikiki::ExpandedName::new("", "class"),
1025                  kuchikikiki::Attribute {
1026                    prefix: None,
1027                    value:  "manpage-reference".into(),
1028                  },
1029                ),
1030              ],
1031            );
1032            link.append(NodeRef::new_text(span_text.clone()));
1033            to_replace.push((span_el.clone(), link));
1034          }
1035        }
1036      }
1037
1038      // Apply replacements
1039      for (old, new) in to_replace {
1040        old.insert_before(new);
1041        old.detach();
1042      }
1043
1044      let mut out = Vec::new();
1045      document.serialize(&mut out).ok();
1046      String::from_utf8(out).unwrap_or_default()
1047    },
1048    // Return original HTML on error
1049    "",
1050  )
1051}
1052
1053/// Process option references
1054/// Converts {option} role markup into links to the options page.
1055///
1056/// This processes `<code>` elements that have the `nixos-option` class, i.e.,
1057/// {option} role markup and convert them into links to the options page.
1058///
1059/// # Arguments
1060///
1061/// * `html` - The HTML string to process.
1062/// * `valid_options` - Optional set of valid option names for validation.
1063///
1064/// # Returns
1065///
1066/// The HTML string with option references rewritten as links.
1067#[cfg(feature = "ndg-flavored")]
1068#[must_use]
1069#[allow(
1070  clippy::implicit_hasher,
1071  reason = "Standard HashSet sufficient for this use case"
1072)]
1073pub fn process_option_references(
1074  html: &str,
1075  valid_options: Option<&std::collections::HashSet<String>>,
1076) -> String {
1077  use kuchikikiki::{Attribute, ExpandedName, NodeRef};
1078  use markup5ever::{QualName, local_name, ns};
1079  use tendril::TendrilSink;
1080
1081  process_safe(
1082    html,
1083    |html| {
1084      let document = kuchikikiki::parse_html().one(html);
1085
1086      let mut to_replace = vec![];
1087
1088      // Only process code elements that already have the nixos-option class
1089      // from {option} role syntax
1090      for code_node in safe_select(&document, "code.nixos-option") {
1091        let code_el = code_node;
1092        let code_text = code_el.text_contents();
1093
1094        // Skip if already wrapped in an option-reference link
1095        let mut is_already_option_ref = false;
1096        let mut current = code_el.parent();
1097        while let Some(parent) = current {
1098          if let Some(element) = parent.as_element() {
1099            if element.name.local == local_name!("a") {
1100              if let Some(class_attr) =
1101                element.attributes.borrow().get(local_name!("class"))
1102              {
1103                if class_attr.contains("option-reference") {
1104                  is_already_option_ref = true;
1105                  break;
1106                }
1107              }
1108            }
1109          }
1110          current = parent.parent();
1111        }
1112
1113        if !is_already_option_ref {
1114          // Check if validation is enabled and option is valid
1115          let should_link =
1116            valid_options.is_none_or(|opts| opts.contains(code_text.as_str())); // If no validation set, link all options
1117
1118          if should_link {
1119            let option_id = format!("option-{}", code_text.replace('.', "-"));
1120            let attrs = vec![
1121              (ExpandedName::new("", "href"), Attribute {
1122                prefix: None,
1123                value:  format!("options.html#{option_id}"),
1124              }),
1125              (ExpandedName::new("", "class"), Attribute {
1126                prefix: None,
1127                value:  "option-reference".into(),
1128              }),
1129            ];
1130            let a = NodeRef::new_element(
1131              QualName::new(None, ns!(html), local_name!("a")),
1132              attrs,
1133            );
1134            let code = NodeRef::new_element(
1135              QualName::new(None, ns!(html), local_name!("code")),
1136              vec![],
1137            );
1138            code.append(NodeRef::new_text(code_text.clone()));
1139            a.append(code);
1140            to_replace.push((code_el.clone(), a));
1141          }
1142          // If should_link is false, leave the code element as-is (no wrapping)
1143        }
1144      }
1145
1146      for (old, new) in to_replace {
1147        old.insert_before(new);
1148        old.detach();
1149      }
1150
1151      let mut out = Vec::new();
1152      document.serialize(&mut out).ok();
1153      String::from_utf8(out).unwrap_or_default()
1154    },
1155    // Return original HTML on error
1156    "",
1157  )
1158}
1159
1160/// Extract URL from HTML anchor tag or return the string as-is if it's a plain
1161/// URL
1162fn extract_url_from_html(url_or_html: &str) -> &str {
1163  // Check if it looks like HTML (starts with <a href=")
1164  if url_or_html.starts_with("<a href=\"") {
1165    // Extract the URL from href attribute
1166    if let Some(start) = url_or_html.find("href=\"") {
1167      let start = start + 6; // Skip 'href="'
1168      if let Some(end) = url_or_html[start..].find('"') {
1169        return &url_or_html[start..start + end];
1170      }
1171    }
1172  }
1173
1174  // Return as-is if not HTML or if extraction fails
1175  url_or_html
1176}
ndg_commonmark/processor/extensions.rs

ndg_commonmark/processor/
extensions.rs