ndg_commonmark/processor/
mod.rs

1//! Markdown processing module with modular organization.
2//!
3//! This module provides a comprehensive, trait-based architecture for
4//! processing Markdown content with support for various extensions and output
5//! formats.
6//!
7//! # Architecture
8//!
9//! The processor module is organized into focused submodules:
10//!
11//! - [`core`]: Main processor implementation and processing pipeline
12//! - [`process`]: High-level processing functions with error recovery
13//! - [`extensions`]: Feature-gated processing functions for different Markdown
14//!   flavors
15//! - [`types`]: Core type definitions and configuration structures
16pub mod core;
17pub mod extensions;
18pub mod process;
19pub mod types;
20
21// Re-export commonly used types from submodules
22pub use core::{ProcessorFeature, collect_markdown_files, extract_inline_text};
23
24// Re-export extension functions for third-party use
25#[cfg(feature = "gfm")]
26pub use extensions::apply_gfm_extensions;
27#[cfg(feature = "nixpkgs")]
28pub use extensions::process_manpage_references;
29pub use extensions::process_myst_autolinks;
30#[cfg(feature = "ndg-flavored")]
31pub use extensions::process_option_references;
32#[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
33pub use extensions::process_role_markup;
34#[cfg(feature = "nixpkgs")]
35pub use extensions::{
36  process_block_elements,
37  process_file_includes,
38  process_inline_anchors,
39};
40pub use process::{
41  ProcessorPreset,
42  create_processor,
43  process_batch,
44  process_markdown_file,
45  process_markdown_file_with_basedir,
46  process_markdown_string,
47  process_safe,
48  process_with_recovery,
49};
50pub use types::{
51  AstTransformer,
52  MarkdownOptions,
53  MarkdownOptionsBuilder,
54  MarkdownProcessor,
55  PromptTransformer,
56};
57
58#[cfg(test)]
59mod tests {
60  use html_escape;
61
62  use super::{MarkdownOptions, MarkdownProcessor, types::TabStyle};
63
64  #[test]
65  fn test_html_escaped_roles() {
66    // Test that HTML characters in role content are properly escaped
67    #[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
68    {
69      let result = super::extensions::format_role_markup(
70        "option",
71        "hjem.users.<name>.enable",
72        None,
73        true,
74        None,
75      );
76
77      // Should escape < and > characters in content
78      assert!(result.contains("&lt;name&gt;"));
79      // Should not contain unescaped HTML in code content
80      assert!(!result.contains("<code>hjem.users.<name>.enable</code>"));
81      // Should contain escaped content in code with proper class
82      assert!(result.contains(
83        "<code class=\"nixos-option\">hjem.users.&lt;name&gt;.enable</code>"
84      ));
85      // Should have properly formatted option ID in href with preserved special
86      // chars
87      assert!(result.contains("option-hjem-users-<name>-enable"));
88    }
89  }
90
91  #[test]
92  fn test_html_escape_util() {
93    let input = "test<>&\"'";
94    let escaped = html_escape::encode_text(input);
95
96    // html-escape crate doesn't escape single quotes by default
97    assert_eq!(escaped, "test&lt;&gt;&amp;\"'");
98  }
99
100  #[test]
101  fn test_various_role_types_with_html_characters() {
102    #[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
103    {
104      let content = "<script>alert('xss')</script>";
105
106      let command_result = super::extensions::format_role_markup(
107        "command", content, None, true, None,
108      );
109      assert!(command_result.contains("&lt;script&gt;"));
110      assert!(!command_result.contains("<script>alert"));
111
112      let env_result =
113        super::extensions::format_role_markup("env", content, None, true, None);
114      assert!(env_result.contains("&lt;script&gt;"));
115      assert!(!env_result.contains("<script>alert"));
116
117      let file_result = super::extensions::format_role_markup(
118        "file", content, None, true, None,
119      );
120      assert!(file_result.contains("&lt;script&gt;"));
121      assert!(!file_result.contains("<script>alert"));
122    }
123  }
124
125  #[test]
126  fn test_option_role_escaping() {
127    // Test the specific reported issue: {option}`hjem.users.<name>.enable`
128    #[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
129    {
130      let result = super::extensions::format_role_markup(
131        "option",
132        "hjem.users.<name>.enable",
133        None,
134        true,
135        None,
136      );
137
138      // Should not produce broken HTML like:
139      // <code>hjem.users.<name>.enable</name></code>
140      assert!(!result.contains("</name>"));
141
142      // Should properly escape the angle brackets in display text
143      assert!(result.contains("&lt;name&gt;"));
144
145      // Should produce valid HTML structure with proper class
146      assert!(result.contains(
147        "<code class=\"nixos-option\">hjem.users.&lt;name&gt;.enable</code>"
148      ));
149
150      // Should preserve special characters in the option ID (the actual anchor)
151      assert!(result.contains("options.html#option-hjem-users-<name>-enable"));
152    }
153  }
154
155  #[test]
156  fn test_option_role_special_chars_preserved() {
157    // Test that special characters are preserved in option IDs
158    #[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
159    {
160      let result = super::extensions::format_role_markup(
161        "option",
162        "services.foo.<bar>.enable",
163        None,
164        true,
165        None,
166      );
167
168      // Option ID should preserve angle brackets
169      assert!(result.contains("option-services-foo-<bar>-enable"));
170
171      // Display text should be HTML escaped
172      assert!(result.contains("&lt;bar&gt;"));
173    }
174  }
175
176  #[test]
177  fn test_hardtab_handling_none() {
178    let options = MarkdownOptions {
179      tab_style: TabStyle::None,
180      highlight_code: false,
181      ..Default::default()
182    };
183    let processor = MarkdownProcessor::new(options);
184
185    let markdown = r#"
186# Test Code
187
188```rust
189fn main() {
190	println!("Hello, world!");
191}
192```
193"#;
194
195    let result = processor.render(markdown);
196    assert!(result.html.contains("\tprintln"));
197  }
198
199  #[test]
200  fn test_hardtab_handling_warn() {
201    let options = MarkdownOptions {
202      tab_style: TabStyle::Warn,
203      highlight_code: false,
204      ..Default::default()
205    };
206    let processor = MarkdownProcessor::new(options);
207
208    let markdown = r#"
209# Test Code
210
211```rust
212fn main() {
213	println!("Hello, world!");
214}
215```
216"#;
217
218    let result = processor.render(markdown);
219    // Should preserve hard tabs but issue warning
220    assert!(result.html.contains("\tprintln"));
221  }
222
223  #[test]
224  fn test_hardtab_handling_normalize() {
225    let options = MarkdownOptions {
226      tab_style: TabStyle::Normalize,
227      highlight_code: false,
228      ..Default::default()
229    };
230    let processor = MarkdownProcessor::new(options);
231
232    let markdown = r#"
233# Test Code
234
235```rust
236fn main() {
237	println!("Hello, world!");
238}
239```
240"#;
241
242    let result = processor.render(markdown);
243    // Should convert hard tabs to 2 spaces
244    assert!(!result.html.contains("\tprintln"));
245    assert!(result.html.contains("  println"));
246  }
247
248  #[test]
249  fn test_hardtab_handling_no_tabs() {
250    let options = MarkdownOptions {
251      tab_style: TabStyle::Warn,
252      highlight_code: false,
253      ..Default::default()
254    };
255    let processor = MarkdownProcessor::new(options);
256
257    let markdown = r#"
258# Test Code
259
260```rust
261fn main() {
262    println!("Hello, world!");
263}
264```
265"#;
266
267    let result = processor.render(markdown);
268    // Should work fine when no tabs are present
269    assert!(result.html.contains("    println"));
270    assert!(!result.html.contains("\t"));
271  }
272
273  #[test]
274  fn test_hardtab_handling_mixed_content() {
275    let options = MarkdownOptions {
276      tab_style: TabStyle::Normalize,
277      highlight_code: false,
278      ..Default::default()
279    };
280    let processor = MarkdownProcessor::new(options);
281
282    let markdown = r#"
283# Test Code
284
285```rust
286fn main() {
287	println!("Hello");  // tab here
288    println!("World");  // spaces here
289}
290```
291"#;
292
293    let result = processor.render(markdown);
294    // Should convert only tabs, preserve spaces
295    assert!(!result.html.contains("\tprintln"));
296    assert!(result.html.contains("  println"));
297    assert!(result.html.contains("    println"));
298  }
299}