Skip to main content

ndg_commonmark/processor/
mod.rs

1//! Markdown processing module with modular organization.
2//!
3//! This module provides a comprehensive, trait-based architecture for
4//! processing Markdown content with support for various extensions and output
5//! formats.
6//!
7//! # Architecture
8//!
9//! The processor module is organized into focused submodules:
10//!
11//! - [`core`]: Main processor implementation and processing pipeline
12//! - [`dom`]: DOM extraction helpers
13//! - [`process`]: High-level processing functions
14//! - [`extensions`]: Feature-gated processing functions for different Markdown
15//!   flavors
16//! - [`types`]: Core type definitions and configuration structures
17pub mod core;
18pub mod dom;
19pub mod extensions;
20pub mod process;
21pub mod types;
22
23// Re-export commonly used types from submodules
24pub use core::{ProcessorFeature, collect_markdown_files, extract_inline_text};
25
26// Re-export extension functions for third-party use
27#[cfg(feature = "gfm")]
28pub use extensions::apply_gfm_extensions;
29#[cfg(feature = "nixpkgs")]
30pub use extensions::process_manpage_references;
31pub use extensions::process_myst_autolinks;
32#[cfg(feature = "ndg-flavored")]
33pub use extensions::process_option_references;
34#[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
35pub use extensions::process_role_markup;
36#[cfg(feature = "wiki")]
37pub use extensions::process_wikilinks;
38#[cfg(feature = "nixpkgs")]
39pub use extensions::{
40  process_block_elements,
41  process_bracketed_spans,
42  process_file_includes,
43  process_inline_anchors,
44};
45pub use process::{
46  ProcessorPreset,
47  create_processor,
48  process_batch,
49  process_markdown_file,
50  process_markdown_file_with_basedir,
51  process_markdown_string,
52  process_safe,
53  process_with_recovery,
54};
55pub use types::{
56  AstTransformer,
57  MarkdownOptions,
58  MarkdownOptionsBuilder,
59  MarkdownProcessor,
60  PromptTransformer,
61};
62
63#[cfg(test)]
64mod tests {
65  use html_escape;
66
67  use super::{MarkdownOptions, MarkdownProcessor, types::TabStyle};
68
69  #[test]
70  fn test_html_escaped_roles() {
71    // Test that HTML characters in role content are properly escaped
72    #[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
73    {
74      let result = super::extensions::format_role_markup(
75        "option",
76        "hjem.users.<name>.enable",
77        None,
78        true,
79        None,
80      );
81
82      // Should escape < and > characters in content
83      assert!(result.contains("&lt;name&gt;"));
84      // Should not contain unescaped HTML in code content
85      assert!(!result.contains("<code>hjem.users.<name>.enable</code>"));
86      // Should contain escaped content in code with proper class
87      assert!(result.contains(
88        "<code class=\"nixos-option\">hjem.users.&lt;name&gt;.enable</code>"
89      ));
90      // Should have properly formatted option ID in href with sanitized special
91      // chars to remain compatible with n-r-d
92      assert!(result.contains("option-hjem.users._name_.enable"));
93    }
94  }
95
96  #[test]
97  fn test_html_escape_util() {
98    let input = "test<>&\"'";
99    let escaped = html_escape::encode_text(input);
100
101    // html-escape crate doesn't escape single quotes by default
102    assert_eq!(escaped, "test&lt;&gt;&amp;\"'");
103  }
104
105  #[test]
106  fn test_toc_anchor_matches_heading_id_for_angle_brackets() {
107    // Regression: a heading whose text contains markup characters such as
108    // `<name>` must have its table-of-contents anchor (`Header.id`) match the
109    // auto-generated `id` attribute on the rendered heading, otherwise
110    // "jump to header" links point at a non-existent anchor. The heading `id`
111    // slugifies the escaped HTML (`&lt;name&gt;`), so the TOC must too.
112    let processor = MarkdownProcessor::new(MarkdownOptions::default());
113    // The Nix options renderer emits the angle brackets backslash-escaped so
114    // comrak treats them as literal text rather than an inline HTML tag,
115    // yielding `environments.&lt;name&gt;.deployment` in the rendered heading.
116    let result = processor.render("## environments.\\<name\\>.deployment\n");
117
118    let header = result
119      .headers
120      .iter()
121      .find(|h| h.level == 2)
122      .expect("expected an h2 header");
123
124    // The heading id is the slug of the escaped HTML, not the raw `<name>`.
125    assert_eq!(header.id, "environments--lt-name-gt--deployment");
126    // The rendered HTML must carry the same id so the TOC anchor resolves.
127    assert!(
128      result.html.contains(&format!("id=\"{}\"", header.id)),
129      "rendered HTML {:?} is missing id={:?}",
130      result.html,
131      header.id
132    );
133  }
134
135  #[test]
136  fn test_various_role_types_with_html_characters() {
137    #[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
138    {
139      let content = "<script>alert('xss')</script>";
140
141      let command_result = super::extensions::format_role_markup(
142        "command", content, None, true, None,
143      );
144      assert!(command_result.contains("&lt;script&gt;"));
145      assert!(!command_result.contains("<script>alert"));
146
147      let env_result =
148        super::extensions::format_role_markup("env", content, None, true, None);
149      assert!(env_result.contains("&lt;script&gt;"));
150      assert!(!env_result.contains("<script>alert"));
151
152      let file_result = super::extensions::format_role_markup(
153        "file", content, None, true, None,
154      );
155      assert!(file_result.contains("&lt;script&gt;"));
156      assert!(!file_result.contains("<script>alert"));
157    }
158  }
159
160  #[test]
161  fn test_option_role_escaping() {
162    // Test the specific reported issue: {option}`hjem.users.<name>.enable`
163    #[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
164    {
165      let result = super::extensions::format_role_markup(
166        "option",
167        "hjem.users.<name>.enable",
168        None,
169        true,
170        None,
171      );
172
173      // Should not produce broken HTML like:
174      // <code>hjem.users.<name>.enable</name></code>
175      assert!(!result.contains("</name>"));
176
177      // Should properly escape the angle brackets in display text
178      assert!(result.contains("&lt;name&gt;"));
179
180      // Should produce valid HTML structure with proper class
181      assert!(result.contains(
182        "<code class=\"nixos-option\">hjem.users.&lt;name&gt;.enable</code>"
183      ));
184
185      // Should sanitize special characters in the option ID
186      assert!(result.contains("options.html#option-hjem.users._name_.enable"));
187    }
188  }
189
190  #[test]
191  fn test_option_role_special_chars_preserved() {
192    // Test that special characters are preserved in option IDs
193    #[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
194    {
195      let result = super::extensions::format_role_markup(
196        "option",
197        "services.foo.<bar>.enable",
198        None,
199        true,
200        None,
201      );
202
203      // Option ID should sanitize angle brackets to underscores
204      assert!(result.contains("option-services.foo._bar_.enable"));
205
206      // Display text should be HTML escaped
207      assert!(result.contains("&lt;bar&gt;"));
208    }
209  }
210
211  #[test]
212  fn test_hardtab_handling_none() {
213    let options = MarkdownOptions {
214      tab_style: TabStyle::None,
215      highlight_code: false,
216      ..Default::default()
217    };
218    let processor = MarkdownProcessor::new(options);
219
220    let markdown = r#"
221# Test Code
222
223```rust
224fn main() {
225	println!("Hello, world!");
226}
227```
228"#;
229
230    let result = processor.render(markdown);
231    assert!(result.html.contains("\tprintln"));
232  }
233
234  #[test]
235  fn test_hardtab_handling_warn() {
236    let options = MarkdownOptions {
237      tab_style: TabStyle::Warn,
238      highlight_code: false,
239      ..Default::default()
240    };
241    let processor = MarkdownProcessor::new(options);
242
243    let markdown = r#"
244# Test Code
245
246```rust
247fn main() {
248	println!("Hello, world!");
249}
250```
251"#;
252
253    let result = processor.render(markdown);
254    // Should preserve hard tabs but issue warning
255    assert!(result.html.contains("\tprintln"));
256  }
257
258  #[test]
259  fn test_hardtab_handling_normalize() {
260    let options = MarkdownOptions {
261      tab_style: TabStyle::Normalize,
262      highlight_code: false,
263      ..Default::default()
264    };
265    let processor = MarkdownProcessor::new(options);
266
267    let markdown = r#"
268# Test Code
269
270```rust
271fn main() {
272	println!("Hello, world!");
273}
274```
275"#;
276
277    let result = processor.render(markdown);
278    // Should convert hard tabs to 2 spaces
279    assert!(!result.html.contains("\tprintln"));
280    assert!(result.html.contains("  println"));
281  }
282
283  #[test]
284  fn test_hardtab_handling_no_tabs() {
285    let options = MarkdownOptions {
286      tab_style: TabStyle::Warn,
287      highlight_code: false,
288      ..Default::default()
289    };
290    let processor = MarkdownProcessor::new(options);
291
292    let markdown = r#"
293# Test Code
294
295```rust
296fn main() {
297    println!("Hello, world!");
298}
299```
300"#;
301
302    let result = processor.render(markdown);
303    // Should work fine when no tabs are present
304    assert!(result.html.contains("    println"));
305    assert!(!result.html.contains('\t'));
306  }
307
308  #[test]
309  fn test_hardtab_handling_mixed_content() {
310    let options = MarkdownOptions {
311      tab_style: TabStyle::Normalize,
312      highlight_code: false,
313      ..Default::default()
314    };
315    let processor = MarkdownProcessor::new(options);
316
317    let markdown = r#"
318# Test Code
319
320```rust
321fn main() {
322	println!("Hello");  // tab here
323    println!("World");  // spaces here
324}
325```
326"#;
327
328    let result = processor.render(markdown);
329    // Should convert only tabs, preserve spaces
330    assert!(!result.html.contains("\tprintln"));
331    assert!(result.html.contains("  println"));
332    assert!(result.html.contains("    println"));
333  }
334}