Skip to main content

ndg_commonmark/processor/
mod.rs

1//! Markdown processing module with modular organization.
2//!
3//! This module provides a comprehensive, trait-based architecture for
4//! processing Markdown content with support for various extensions and output
5//! formats.
6//!
7//! # Architecture
8//!
9//! The processor module is organized into focused submodules:
10//!
11//! - [`core`]: Main processor implementation and processing pipeline
12//! - [`dom`]: DOM extraction helpers
13//! - [`process`]: High-level processing functions
14//! - [`extensions`]: Feature-gated processing functions for different Markdown
15//!   flavors
16//! - [`types`]: Core type definitions and configuration structures
17pub mod core;
18pub mod dom;
19pub mod extensions;
20pub mod process;
21pub mod types;
22
23// Re-export commonly used types from submodules
24pub use core::{
25  ProcessorFeature,
26  collect_markdown_files,
27  extract_inline_text,
28  rewrite_cross_page_anchor_links,
29};
30
31// Re-export extension functions for third-party use
32#[cfg(feature = "gfm")]
33pub use extensions::apply_gfm_extensions;
34#[cfg(feature = "nixpkgs")]
35pub use extensions::process_manpage_references;
36pub use extensions::process_myst_autolinks;
37#[cfg(feature = "ndg-flavored")]
38pub use extensions::process_option_references;
39#[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
40pub use extensions::process_role_markup;
41#[cfg(feature = "wiki")]
42pub use extensions::process_wikilinks;
43#[cfg(feature = "nixpkgs")]
44pub use extensions::{
45  process_block_elements,
46  process_bracketed_spans,
47  process_file_includes,
48  process_inline_anchors,
49};
50pub use process::{
51  ProcessorPreset,
52  create_processor,
53  process_batch,
54  process_markdown_file,
55  process_markdown_file_with_basedir,
56  process_markdown_string,
57  process_safe,
58  process_with_recovery,
59};
60pub use types::{
61  AstTransformer,
62  MarkdownOptions,
63  MarkdownOptionsBuilder,
64  MarkdownProcessor,
65  PromptTransformer,
66};
67
68#[cfg(test)]
69#[expect(clippy::expect_used, reason = "Fine in tests")]
70mod tests {
71  use html_escape;
72
73  use super::{MarkdownOptions, MarkdownProcessor, types::TabStyle};
74
75  #[test]
76  fn test_html_escaped_roles() {
77    // Test that HTML characters in role content are properly escaped
78    #[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
79    {
80      let result = super::extensions::format_role_markup(
81        "option",
82        "hjem.users.<name>.enable",
83        None,
84        true,
85        None,
86      );
87
88      // Should escape < and > characters in content
89      assert!(result.contains("&lt;name&gt;"));
90      // Should not contain unescaped HTML in code content
91      assert!(!result.contains("<code>hjem.users.<name>.enable</code>"));
92      // Should contain escaped content in code with proper class
93      assert!(result.contains(
94        "<code class=\"nixos-option\">hjem.users.&lt;name&gt;.enable</code>"
95      ));
96      // Should have properly formatted option ID in href with sanitized special
97      // chars to remain compatible with n-r-d
98      assert!(result.contains("option-hjem.users._name_.enable"));
99    }
100  }
101
102  #[test]
103  fn test_html_escape_util() {
104    let input = "test<>&\"'";
105    let escaped = html_escape::encode_text(input);
106
107    // html-escape crate doesn't escape single quotes by default
108    assert_eq!(escaped, "test&lt;&gt;&amp;\"'");
109  }
110
111  #[test]
112  fn test_toc_anchor_matches_heading_id_for_angle_brackets() {
113    // Regression: a heading whose text contains markup characters such as
114    // `<name>` must have its table-of-contents anchor (`Header.id`) match the
115    // auto-generated `id` attribute on the rendered heading, otherwise
116    // "jump to header" links point at a non-existent anchor. The heading `id`
117    // slugifies the escaped HTML (`&lt;name&gt;`), so the TOC must too.
118    let processor = MarkdownProcessor::new(MarkdownOptions::default());
119    // The Nix options renderer emits the angle brackets backslash-escaped so
120    // comrak treats them as literal text rather than an inline HTML tag,
121    // yielding `environments.&lt;name&gt;.deployment` in the rendered heading.
122    let result = processor.render("## environments.\\<name\\>.deployment\n");
123
124    let header = result
125      .headers
126      .iter()
127      .find(|h| h.level == 2)
128      .expect("expected an h2 header");
129
130    // The heading id is the slug of the escaped HTML, not the raw `<name>`.
131    assert_eq!(header.id, "environments--lt-name-gt--deployment");
132    // The rendered HTML must carry the same id so the TOC anchor resolves.
133    assert!(
134      result.html.contains(&format!("id=\"{}\"", header.id)),
135      "rendered HTML {:?} is missing id={:?}",
136      result.html,
137      header.id
138    );
139  }
140
141  #[test]
142  fn test_various_role_types_with_html_characters() {
143    #[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
144    {
145      let content = "<script>alert('xss')</script>";
146
147      let command_result = super::extensions::format_role_markup(
148        "command", content, None, true, None,
149      );
150      assert!(command_result.contains("&lt;script&gt;"));
151      assert!(!command_result.contains("<script>alert"));
152
153      let env_result =
154        super::extensions::format_role_markup("env", content, None, true, None);
155      assert!(env_result.contains("&lt;script&gt;"));
156      assert!(!env_result.contains("<script>alert"));
157
158      let file_result = super::extensions::format_role_markup(
159        "file", content, None, true, None,
160      );
161      assert!(file_result.contains("&lt;script&gt;"));
162      assert!(!file_result.contains("<script>alert"));
163    }
164  }
165
166  #[test]
167  fn test_option_role_escaping() {
168    // Test the specific reported issue: {option}`hjem.users.<name>.enable`
169    #[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
170    {
171      let result = super::extensions::format_role_markup(
172        "option",
173        "hjem.users.<name>.enable",
174        None,
175        true,
176        None,
177      );
178
179      // Should not produce broken HTML like:
180      // <code>hjem.users.<name>.enable</name></code>
181      assert!(!result.contains("</name>"));
182
183      // Should properly escape the angle brackets in display text
184      assert!(result.contains("&lt;name&gt;"));
185
186      // Should produce valid HTML structure with proper class
187      assert!(result.contains(
188        "<code class=\"nixos-option\">hjem.users.&lt;name&gt;.enable</code>"
189      ));
190
191      // Should sanitize special characters in the option ID
192      assert!(result.contains("options.html#option-hjem.users._name_.enable"));
193    }
194  }
195
196  #[test]
197  fn test_option_role_special_chars_preserved() {
198    // Test that special characters are preserved in option IDs
199    #[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
200    {
201      let result = super::extensions::format_role_markup(
202        "option",
203        "services.foo.<bar>.enable",
204        None,
205        true,
206        None,
207      );
208
209      // Option ID should sanitize angle brackets to underscores
210      assert!(result.contains("option-services.foo._bar_.enable"));
211
212      // Display text should be HTML escaped
213      assert!(result.contains("&lt;bar&gt;"));
214    }
215  }
216
217  #[test]
218  fn test_hardtab_handling_none() {
219    let options = MarkdownOptions {
220      tab_style: TabStyle::None,
221      highlight_code: false,
222      ..Default::default()
223    };
224    let processor = MarkdownProcessor::new(options);
225
226    let markdown = r#"
227# Test Code
228
229```rust
230fn main() {
231	println!("Hello, world!");
232}
233```
234"#;
235
236    let result = processor.render(markdown);
237    assert!(result.html.contains("\tprintln"));
238  }
239
240  #[test]
241  fn test_hardtab_handling_warn() {
242    let options = MarkdownOptions {
243      tab_style: TabStyle::Warn,
244      highlight_code: false,
245      ..Default::default()
246    };
247    let processor = MarkdownProcessor::new(options);
248
249    let markdown = r#"
250# Test Code
251
252```rust
253fn main() {
254	println!("Hello, world!");
255}
256```
257"#;
258
259    let result = processor.render(markdown);
260    // Should preserve hard tabs but issue warning
261    assert!(result.html.contains("\tprintln"));
262  }
263
264  #[test]
265  fn test_hardtab_handling_normalize() {
266    let options = MarkdownOptions {
267      tab_style: TabStyle::Normalize,
268      highlight_code: false,
269      ..Default::default()
270    };
271    let processor = MarkdownProcessor::new(options);
272
273    let markdown = r#"
274# Test Code
275
276```rust
277fn main() {
278	println!("Hello, world!");
279}
280```
281"#;
282
283    let result = processor.render(markdown);
284    // Should convert hard tabs to 2 spaces
285    assert!(!result.html.contains("\tprintln"));
286    assert!(result.html.contains("  println"));
287  }
288
289  #[test]
290  fn test_hardtab_handling_no_tabs() {
291    let options = MarkdownOptions {
292      tab_style: TabStyle::Warn,
293      highlight_code: false,
294      ..Default::default()
295    };
296    let processor = MarkdownProcessor::new(options);
297
298    let markdown = r#"
299# Test Code
300
301```rust
302fn main() {
303    println!("Hello, world!");
304}
305```
306"#;
307
308    let result = processor.render(markdown);
309    // Should work fine when no tabs are present
310    assert!(result.html.contains("    println"));
311    assert!(!result.html.contains('\t'));
312  }
313
314  #[test]
315  fn test_hardtab_handling_mixed_content() {
316    let options = MarkdownOptions {
317      tab_style: TabStyle::Normalize,
318      highlight_code: false,
319      ..Default::default()
320    };
321    let processor = MarkdownProcessor::new(options);
322
323    let markdown = r#"
324# Test Code
325
326```rust
327fn main() {
328	println!("Hello");  // tab here
329    println!("World");  // spaces here
330}
331```
332"#;
333
334    let result = processor.render(markdown);
335    // Should convert only tabs, preserve spaces
336    assert!(!result.html.contains("\tprintln"));
337    assert!(result.html.contains("  println"));
338    assert!(result.html.contains("    println"));
339  }
340}