Skip to main content

ndg_commonmark/processor/
mod.rs

1//! Markdown processing module with modular organization.
2//!
3//! This module provides a comprehensive, trait-based architecture for
4//! processing Markdown content with support for various extensions and output
5//! formats.
6//!
7//! # Architecture
8//!
9//! The processor module is organized into focused submodules:
10//!
11//! - [`core`]: Main processor implementation and processing pipeline
12//! - [`dom`]: DOM extraction helpers
13//! - [`process`]: High-level processing functions
14//! - [`extensions`]: Feature-gated processing functions for different Markdown
15//!   flavors
16//! - [`types`]: Core type definitions and configuration structures
17pub mod core;
18pub mod dom;
19pub mod extensions;
20pub mod process;
21pub mod types;
22
23// Re-export commonly used types from submodules
24pub use core::{ProcessorFeature, collect_markdown_files, extract_inline_text};
25
26// Re-export extension functions for third-party use
27#[cfg(feature = "gfm")]
28pub use extensions::apply_gfm_extensions;
29#[cfg(feature = "nixpkgs")]
30pub use extensions::process_manpage_references;
31pub use extensions::process_myst_autolinks;
32#[cfg(feature = "ndg-flavored")]
33pub use extensions::process_option_references;
34#[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
35pub use extensions::process_role_markup;
36#[cfg(feature = "wiki")]
37pub use extensions::process_wikilinks;
38#[cfg(feature = "nixpkgs")]
39pub use extensions::{
40  process_block_elements,
41  process_bracketed_spans,
42  process_file_includes,
43  process_inline_anchors,
44};
45pub use process::{
46  ProcessorPreset,
47  create_processor,
48  process_batch,
49  process_markdown_file,
50  process_markdown_file_with_basedir,
51  process_markdown_string,
52  process_safe,
53  process_with_recovery,
54};
55pub use types::{
56  AstTransformer,
57  MarkdownOptions,
58  MarkdownOptionsBuilder,
59  MarkdownProcessor,
60  PromptTransformer,
61};
62
63#[cfg(test)]
64mod tests {
65  use html_escape;
66
67  use super::{MarkdownOptions, MarkdownProcessor, types::TabStyle};
68
69  #[test]
70  fn test_html_escaped_roles() {
71    // Test that HTML characters in role content are properly escaped
72    #[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
73    {
74      let result = super::extensions::format_role_markup(
75        "option",
76        "hjem.users.<name>.enable",
77        None,
78        true,
79        None,
80      );
81
82      // Should escape < and > characters in content
83      assert!(result.contains("&lt;name&gt;"));
84      // Should not contain unescaped HTML in code content
85      assert!(!result.contains("<code>hjem.users.<name>.enable</code>"));
86      // Should contain escaped content in code with proper class
87      assert!(result.contains(
88        "<code class=\"nixos-option\">hjem.users.&lt;name&gt;.enable</code>"
89      ));
90      // Should have properly formatted option ID in href with sanitized special
91      // chars to remain compatible with n-r-d
92      assert!(result.contains("option-hjem.users._name_.enable"));
93    }
94  }
95
96  #[test]
97  fn test_html_escape_util() {
98    let input = "test<>&\"'";
99    let escaped = html_escape::encode_text(input);
100
101    // html-escape crate doesn't escape single quotes by default
102    assert_eq!(escaped, "test&lt;&gt;&amp;\"'");
103  }
104
105  #[test]
106  fn test_various_role_types_with_html_characters() {
107    #[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
108    {
109      let content = "<script>alert('xss')</script>";
110
111      let command_result = super::extensions::format_role_markup(
112        "command", content, None, true, None,
113      );
114      assert!(command_result.contains("&lt;script&gt;"));
115      assert!(!command_result.contains("<script>alert"));
116
117      let env_result =
118        super::extensions::format_role_markup("env", content, None, true, None);
119      assert!(env_result.contains("&lt;script&gt;"));
120      assert!(!env_result.contains("<script>alert"));
121
122      let file_result = super::extensions::format_role_markup(
123        "file", content, None, true, None,
124      );
125      assert!(file_result.contains("&lt;script&gt;"));
126      assert!(!file_result.contains("<script>alert"));
127    }
128  }
129
130  #[test]
131  fn test_option_role_escaping() {
132    // Test the specific reported issue: {option}`hjem.users.<name>.enable`
133    #[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
134    {
135      let result = super::extensions::format_role_markup(
136        "option",
137        "hjem.users.<name>.enable",
138        None,
139        true,
140        None,
141      );
142
143      // Should not produce broken HTML like:
144      // <code>hjem.users.<name>.enable</name></code>
145      assert!(!result.contains("</name>"));
146
147      // Should properly escape the angle brackets in display text
148      assert!(result.contains("&lt;name&gt;"));
149
150      // Should produce valid HTML structure with proper class
151      assert!(result.contains(
152        "<code class=\"nixos-option\">hjem.users.&lt;name&gt;.enable</code>"
153      ));
154
155      // Should sanitize special characters in the option ID
156      assert!(result.contains("options.html#option-hjem.users._name_.enable"));
157    }
158  }
159
160  #[test]
161  fn test_option_role_special_chars_preserved() {
162    // Test that special characters are preserved in option IDs
163    #[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
164    {
165      let result = super::extensions::format_role_markup(
166        "option",
167        "services.foo.<bar>.enable",
168        None,
169        true,
170        None,
171      );
172
173      // Option ID should sanitize angle brackets to underscores
174      assert!(result.contains("option-services.foo._bar_.enable"));
175
176      // Display text should be HTML escaped
177      assert!(result.contains("&lt;bar&gt;"));
178    }
179  }
180
181  #[test]
182  fn test_hardtab_handling_none() {
183    let options = MarkdownOptions {
184      tab_style: TabStyle::None,
185      highlight_code: false,
186      ..Default::default()
187    };
188    let processor = MarkdownProcessor::new(options);
189
190    let markdown = r#"
191# Test Code
192
193```rust
194fn main() {
195	println!("Hello, world!");
196}
197```
198"#;
199
200    let result = processor.render(markdown);
201    assert!(result.html.contains("\tprintln"));
202  }
203
204  #[test]
205  fn test_hardtab_handling_warn() {
206    let options = MarkdownOptions {
207      tab_style: TabStyle::Warn,
208      highlight_code: false,
209      ..Default::default()
210    };
211    let processor = MarkdownProcessor::new(options);
212
213    let markdown = r#"
214# Test Code
215
216```rust
217fn main() {
218	println!("Hello, world!");
219}
220```
221"#;
222
223    let result = processor.render(markdown);
224    // Should preserve hard tabs but issue warning
225    assert!(result.html.contains("\tprintln"));
226  }
227
228  #[test]
229  fn test_hardtab_handling_normalize() {
230    let options = MarkdownOptions {
231      tab_style: TabStyle::Normalize,
232      highlight_code: false,
233      ..Default::default()
234    };
235    let processor = MarkdownProcessor::new(options);
236
237    let markdown = r#"
238# Test Code
239
240```rust
241fn main() {
242	println!("Hello, world!");
243}
244```
245"#;
246
247    let result = processor.render(markdown);
248    // Should convert hard tabs to 2 spaces
249    assert!(!result.html.contains("\tprintln"));
250    assert!(result.html.contains("  println"));
251  }
252
253  #[test]
254  fn test_hardtab_handling_no_tabs() {
255    let options = MarkdownOptions {
256      tab_style: TabStyle::Warn,
257      highlight_code: false,
258      ..Default::default()
259    };
260    let processor = MarkdownProcessor::new(options);
261
262    let markdown = r#"
263# Test Code
264
265```rust
266fn main() {
267    println!("Hello, world!");
268}
269```
270"#;
271
272    let result = processor.render(markdown);
273    // Should work fine when no tabs are present
274    assert!(result.html.contains("    println"));
275    assert!(!result.html.contains('\t'));
276  }
277
278  #[test]
279  fn test_hardtab_handling_mixed_content() {
280    let options = MarkdownOptions {
281      tab_style: TabStyle::Normalize,
282      highlight_code: false,
283      ..Default::default()
284    };
285    let processor = MarkdownProcessor::new(options);
286
287    let markdown = r#"
288# Test Code
289
290```rust
291fn main() {
292	println!("Hello");  // tab here
293    println!("World");  // spaces here
294}
295```
296"#;
297
298    let result = processor.render(markdown);
299    // Should convert only tabs, preserve spaces
300    assert!(!result.html.contains("\tprintln"));
301    assert!(result.html.contains("  println"));
302    assert!(result.html.contains("    println"));
303  }
304}