Skip to main content

ndg_commonmark/processor/
mod.rs

1//! Markdown processing module with modular organization.
2//!
3//! This module provides a comprehensive, trait-based architecture for
4//! processing Markdown content with support for various extensions and output
5//! formats.
6//!
7//! # Architecture
8//!
9//! The processor module is organized into focused submodules:
10//!
11//! - [`core`]: Main processor implementation and processing pipeline
12//! - [`dom`]: DOM extraction helpers
13//! - [`process`]: High-level processing functions
14//! - [`extensions`]: Feature-gated processing functions for different Markdown
15//!   flavors
16//! - [`types`]: Core type definitions and configuration structures
17pub mod core;
18pub mod dom;
19pub mod extensions;
20pub mod process;
21pub mod types;
22
23// Re-export commonly used types from submodules
24pub use core::{ProcessorFeature, collect_markdown_files, extract_inline_text};
25
26// Re-export extension functions for third-party use
27#[cfg(feature = "gfm")]
28pub use extensions::apply_gfm_extensions;
29#[cfg(feature = "nixpkgs")]
30pub use extensions::process_manpage_references;
31pub use extensions::process_myst_autolinks;
32#[cfg(feature = "ndg-flavored")]
33pub use extensions::process_option_references;
34#[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
35pub use extensions::process_role_markup;
36#[cfg(feature = "wiki")]
37pub use extensions::process_wikilinks;
38#[cfg(feature = "nixpkgs")]
39pub use extensions::{
40  process_block_elements,
41  process_file_includes,
42  process_inline_anchors,
43};
44pub use process::{
45  ProcessorPreset,
46  create_processor,
47  process_batch,
48  process_markdown_file,
49  process_markdown_file_with_basedir,
50  process_markdown_string,
51  process_safe,
52  process_with_recovery,
53};
54pub use types::{
55  AstTransformer,
56  MarkdownOptions,
57  MarkdownOptionsBuilder,
58  MarkdownProcessor,
59  PromptTransformer,
60};
61
62#[cfg(test)]
63mod tests {
64  use html_escape;
65
66  use super::{MarkdownOptions, MarkdownProcessor, types::TabStyle};
67
68  #[test]
69  fn test_html_escaped_roles() {
70    // Test that HTML characters in role content are properly escaped
71    #[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
72    {
73      let result = super::extensions::format_role_markup(
74        "option",
75        "hjem.users.<name>.enable",
76        None,
77        true,
78        None,
79      );
80
81      // Should escape < and > characters in content
82      assert!(result.contains("&lt;name&gt;"));
83      // Should not contain unescaped HTML in code content
84      assert!(!result.contains("<code>hjem.users.<name>.enable</code>"));
85      // Should contain escaped content in code with proper class
86      assert!(result.contains(
87        "<code class=\"nixos-option\">hjem.users.&lt;name&gt;.enable</code>"
88      ));
89      // Should have properly formatted option ID in href with sanitized special
90      // chars to remain compatible with n-r-d
91      assert!(result.contains("option-hjem.users._name_.enable"));
92    }
93  }
94
95  #[test]
96  fn test_html_escape_util() {
97    let input = "test<>&\"'";
98    let escaped = html_escape::encode_text(input);
99
100    // html-escape crate doesn't escape single quotes by default
101    assert_eq!(escaped, "test&lt;&gt;&amp;\"'");
102  }
103
104  #[test]
105  fn test_various_role_types_with_html_characters() {
106    #[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
107    {
108      let content = "<script>alert('xss')</script>";
109
110      let command_result = super::extensions::format_role_markup(
111        "command", content, None, true, None,
112      );
113      assert!(command_result.contains("&lt;script&gt;"));
114      assert!(!command_result.contains("<script>alert"));
115
116      let env_result =
117        super::extensions::format_role_markup("env", content, None, true, None);
118      assert!(env_result.contains("&lt;script&gt;"));
119      assert!(!env_result.contains("<script>alert"));
120
121      let file_result = super::extensions::format_role_markup(
122        "file", content, None, true, None,
123      );
124      assert!(file_result.contains("&lt;script&gt;"));
125      assert!(!file_result.contains("<script>alert"));
126    }
127  }
128
129  #[test]
130  fn test_option_role_escaping() {
131    // Test the specific reported issue: {option}`hjem.users.<name>.enable`
132    #[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
133    {
134      let result = super::extensions::format_role_markup(
135        "option",
136        "hjem.users.<name>.enable",
137        None,
138        true,
139        None,
140      );
141
142      // Should not produce broken HTML like:
143      // <code>hjem.users.<name>.enable</name></code>
144      assert!(!result.contains("</name>"));
145
146      // Should properly escape the angle brackets in display text
147      assert!(result.contains("&lt;name&gt;"));
148
149      // Should produce valid HTML structure with proper class
150      assert!(result.contains(
151        "<code class=\"nixos-option\">hjem.users.&lt;name&gt;.enable</code>"
152      ));
153
154      // Should sanitize special characters in the option ID
155      assert!(result.contains("options.html#option-hjem.users._name_.enable"));
156    }
157  }
158
159  #[test]
160  fn test_option_role_special_chars_preserved() {
161    // Test that special characters are preserved in option IDs
162    #[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
163    {
164      let result = super::extensions::format_role_markup(
165        "option",
166        "services.foo.<bar>.enable",
167        None,
168        true,
169        None,
170      );
171
172      // Option ID should sanitize angle brackets to underscores
173      assert!(result.contains("option-services.foo._bar_.enable"));
174
175      // Display text should be HTML escaped
176      assert!(result.contains("&lt;bar&gt;"));
177    }
178  }
179
180  #[test]
181  fn test_hardtab_handling_none() {
182    let options = MarkdownOptions {
183      tab_style: TabStyle::None,
184      highlight_code: false,
185      ..Default::default()
186    };
187    let processor = MarkdownProcessor::new(options);
188
189    let markdown = r#"
190# Test Code
191
192```rust
193fn main() {
194	println!("Hello, world!");
195}
196```
197"#;
198
199    let result = processor.render(markdown);
200    assert!(result.html.contains("\tprintln"));
201  }
202
203  #[test]
204  fn test_hardtab_handling_warn() {
205    let options = MarkdownOptions {
206      tab_style: TabStyle::Warn,
207      highlight_code: false,
208      ..Default::default()
209    };
210    let processor = MarkdownProcessor::new(options);
211
212    let markdown = r#"
213# Test Code
214
215```rust
216fn main() {
217	println!("Hello, world!");
218}
219```
220"#;
221
222    let result = processor.render(markdown);
223    // Should preserve hard tabs but issue warning
224    assert!(result.html.contains("\tprintln"));
225  }
226
227  #[test]
228  fn test_hardtab_handling_normalize() {
229    let options = MarkdownOptions {
230      tab_style: TabStyle::Normalize,
231      highlight_code: false,
232      ..Default::default()
233    };
234    let processor = MarkdownProcessor::new(options);
235
236    let markdown = r#"
237# Test Code
238
239```rust
240fn main() {
241	println!("Hello, world!");
242}
243```
244"#;
245
246    let result = processor.render(markdown);
247    // Should convert hard tabs to 2 spaces
248    assert!(!result.html.contains("\tprintln"));
249    assert!(result.html.contains("  println"));
250  }
251
252  #[test]
253  fn test_hardtab_handling_no_tabs() {
254    let options = MarkdownOptions {
255      tab_style: TabStyle::Warn,
256      highlight_code: false,
257      ..Default::default()
258    };
259    let processor = MarkdownProcessor::new(options);
260
261    let markdown = r#"
262# Test Code
263
264```rust
265fn main() {
266    println!("Hello, world!");
267}
268```
269"#;
270
271    let result = processor.render(markdown);
272    // Should work fine when no tabs are present
273    assert!(result.html.contains("    println"));
274    assert!(!result.html.contains('\t'));
275  }
276
277  #[test]
278  fn test_hardtab_handling_mixed_content() {
279    let options = MarkdownOptions {
280      tab_style: TabStyle::Normalize,
281      highlight_code: false,
282      ..Default::default()
283    };
284    let processor = MarkdownProcessor::new(options);
285
286    let markdown = r#"
287# Test Code
288
289```rust
290fn main() {
291	println!("Hello");  // tab here
292    println!("World");  // spaces here
293}
294```
295"#;
296
297    let result = processor.render(markdown);
298    // Should convert only tabs, preserve spaces
299    assert!(!result.html.contains("\tprintln"));
300    assert!(result.html.contains("  println"));
301    assert!(result.html.contains("    println"));
302  }
303}