Skip to main content

html_generator/
generator.rs

1// Copyright © 2025 HTML Generator. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! HTML generation module for converting Markdown to HTML.
5//!
6//! This module provides functions to generate HTML from Markdown content
7//! using the `mdx-gen` library. It supports various Markdown extensions
8//! and custom configuration options.
9
10#[cfg(not(target_arch = "wasm32"))]
11use crate::error::HtmlError;
12use crate::{
13    accessibility::add_aria_attributes,
14    extract_front_matter,
15    performance::minify_html_string,
16    seo::{escape_html, generate_structured_data_from_doc},
17    utils::generate_table_of_contents,
18    Result,
19};
20#[cfg(target_arch = "wasm32")]
21use comrak::Options;
22use log::warn;
23#[cfg(not(target_arch = "wasm32"))]
24use mdx_gen::{process_markdown, MarkdownOptions, Options};
25use once_cell::sync::Lazy;
26#[cfg(not(target_arch = "wasm32"))]
27use regex::Regex;
28#[cfg(not(target_arch = "wasm32"))]
29use std::borrow::Cow;
30use std::error::Error;
31use std::fmt;
32
33/// Pre-built comrak [`Options`] with every extension this crate uses
34/// enabled. Cloned per call and mutated for `render.r#unsafe`, which
35/// is the only extension-layer bit that varies at runtime. Cheap
36/// shallow clone; avoids reconstructing the full option tree on each
37/// `generate_html` invocation.
38static BASE_COMRAK_OPTIONS: Lazy<Options<'static>> = Lazy::new(|| {
39    let mut opts = Options::default();
40    opts.extension.strikethrough = true;
41    opts.extension.table = true;
42    opts.extension.autolink = true;
43    opts.extension.tasklist = true;
44    opts.extension.superscript = true;
45    opts
46});
47
48/// Severity level for a processing diagnostic.
49///
50/// # Examples
51///
52/// ```
53/// use html_generator::generator::DiagnosticLevel;
54///
55/// let level = DiagnosticLevel::Warning;
56/// assert_eq!(format!("{level:?}"), "Warning");
57/// ```
58#[derive(Debug, Clone, Copy, PartialEq, Eq)]
59pub enum DiagnosticLevel {
60    /// Informational — a step succeeded with notable metrics.
61    Info,
62    /// A non-fatal issue — the pipeline continued with a fallback.
63    Warning,
64    /// A step failed entirely and was skipped.
65    Error,
66}
67
68/// A diagnostic emitted when a post-processing step fails non-fatally.
69///
70/// # Examples
71///
72/// ```
73/// use html_generator::generator::{Diagnostic, DiagnosticLevel};
74///
75/// let d = Diagnostic {
76///     step: "accessibility",
77///     level: DiagnosticLevel::Info,
78///     message: "ARIA attributes added".to_string(),
79/// };
80/// assert_eq!(d.step, "accessibility");
81/// assert!(d.to_string().contains("ARIA attributes added"));
82/// ```
83#[derive(Debug, Clone)]
84pub struct Diagnostic {
85    /// Which pipeline step produced this diagnostic.
86    pub step: &'static str,
87    /// Severity.
88    pub level: DiagnosticLevel,
89    /// Human-readable description of what went wrong.
90    pub message: String,
91}
92
93impl fmt::Display for Diagnostic {
94    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
95        write!(f, "[{:?}] {}: {}", self.level, self.step, self.message)
96    }
97}
98
99/// The result of [`generate_html_with_diagnostics`]: final HTML plus any
100/// warnings from post-processing steps that failed non-fatally.
101///
102/// # Examples
103///
104/// ```
105/// use html_generator::{generator::generate_html_with_diagnostics, HtmlConfig};
106///
107/// let out = generate_html_with_diagnostics("# hello", &HtmlConfig::default()).unwrap();
108/// assert!(out.html.contains("<h1>"));
109/// // diagnostics records what each pipeline step did or skipped:
110/// let _ = out.diagnostics.len();
111/// ```
112#[derive(Debug, Clone)]
113pub struct HtmlOutput {
114    /// The generated HTML content.
115    pub html: String,
116    /// Diagnostics from pipeline steps that were skipped or degraded.
117    /// Empty when every step succeeded.
118    pub diagnostics: Vec<Diagnostic>,
119}
120
121/// Regex matching triple-colon custom class blocks. The static is
122/// only consulted by the native `markdown_to_html_impl` path; on
123/// `wasm32` we delegate directly to comrak and the helpers below
124/// are dead code.
125#[cfg(not(target_arch = "wasm32"))]
126static CUSTOM_CLASS_REGEX: Lazy<Regex> = Lazy::new(|| {
127    Regex::new(r":::(\w+)\n([\s\S]*?)\n:::")
128        .expect("static CUSTOM_CLASS_REGEX must compile")
129});
130
131/// Regex matching image-with-class syntax: `![alt](url).class="cls"`.
132/// Native-only; see `CUSTOM_CLASS_REGEX` above.
133#[cfg(not(target_arch = "wasm32"))]
134static IMAGE_CLASS_REGEX: Lazy<Regex> = Lazy::new(|| {
135    Regex::new(r#"!\[(.*?)\]\((.*?)\)\.class="(.*?)""#)
136        .expect("static IMAGE_CLASS_REGEX must compile")
137});
138
139/// Generate HTML from Markdown content using `mdx-gen`.
140///
141/// This function takes Markdown content and a configuration object,
142/// converts the Markdown into HTML, and applies the full processing
143/// pipeline based on configuration:
144///
145/// 1. Markdown → HTML conversion (with extensions)
146/// 2. Accessibility: adds ARIA attributes if enabled
147/// 3. Table of contents: injects TOC at `[[TOC]]` placeholder
148/// 4. Structured data: appends JSON-LD script tag
149/// 5. Minification: compresses output if enabled
150///
151/// Non-fatal failures in steps 2–5 are silently skipped. Use
152/// [`generate_html_with_diagnostics`] to inspect which steps failed.
153///
154/// # Examples
155///
156/// ```
157/// use html_generator::{generator::generate_html, HtmlConfig};
158///
159/// let html = generate_html("# Hello", &HtmlConfig::default()).unwrap();
160/// assert!(html.contains("<h1>Hello</h1>"));
161/// ```
162///
163/// # Errors
164///
165/// Returns [`crate::error::HtmlError`] if the core Markdown→HTML
166/// conversion fails (input invalid, exceeds buffer limits, etc.).
167pub fn generate_html(
168    markdown: &str,
169    config: &crate::HtmlConfig,
170) -> Result<String> {
171    generate_html_with_diagnostics(markdown, config).map(|o| o.html)
172}
173
174/// Like [`generate_html`], but returns an [`HtmlOutput`] that includes
175/// diagnostics for any post-processing steps that failed non-fatally.
176///
177/// # Examples
178///
179/// ```
180/// use html_generator::{
181///     generator::{generate_html_with_diagnostics, DiagnosticLevel},
182///     HtmlConfig,
183/// };
184///
185/// let out =
186///     generate_html_with_diagnostics("# Hello", &HtmlConfig::default()).unwrap();
187/// assert!(out.html.contains("<h1>"));
188/// // No fatal errors; any diagnostics are informational or warnings.
189/// assert!(
190///     out.diagnostics
191///         .iter()
192///         .all(|d| d.level != DiagnosticLevel::Error)
193/// );
194/// ```
195///
196/// # Errors
197///
198/// Returns [`crate::error::HtmlError`] if the core Markdown→HTML
199/// conversion fails. Non-fatal post-processing failures are recorded
200/// as `Error`-level diagnostics rather than propagated.
201pub fn generate_html_with_diagnostics(
202    markdown: &str,
203    config: &crate::HtmlConfig,
204) -> Result<HtmlOutput> {
205    let mut diagnostics: Vec<Diagnostic> = Vec::new();
206
207    // Step 1: Core Markdown → HTML (fatal on failure)
208    let mut html = markdown_to_html_impl(markdown, config)?;
209
210    // Step 2: HTML sanitization via ammonia (when raw HTML is allowed)
211    if config.allow_unsafe_html && config.sanitize_html {
212        html = ammonia::clean(&html);
213        diagnostics.push(Diagnostic {
214            step: "sanitization",
215            level: DiagnosticLevel::Info,
216            message: "HTML sanitized via ammonia".to_string(),
217        });
218    }
219
220    // Step 3: Accessibility — add ARIA attributes
221    if config.add_aria_attributes {
222        match add_aria_attributes(&html, None) {
223            Ok(enhanced) => {
224                html = enhanced;
225                diagnostics.push(Diagnostic {
226                    step: "accessibility",
227                    level: DiagnosticLevel::Info,
228                    message: "ARIA attributes added".to_string(),
229                });
230            }
231            Err(e) => {
232                let d = Diagnostic {
233                    step: "accessibility",
234                    level: DiagnosticLevel::Error,
235                    message: format!("ARIA enhancement skipped: {e}"),
236                };
237                warn!("{d}");
238                diagnostics.push(d);
239            }
240        }
241    }
242
243    // Step 4: Table of contents — replace [[TOC]] placeholder
244    if config.generate_toc {
245        match generate_table_of_contents(&html) {
246            Ok(toc) => {
247                html = html.replace("[[TOC]]", &toc);
248                diagnostics.push(Diagnostic {
249                    step: "toc",
250                    level: DiagnosticLevel::Info,
251                    message: "Table of contents injected".to_string(),
252                });
253            }
254            Err(e) => {
255                let d = Diagnostic {
256                    step: "toc",
257                    level: DiagnosticLevel::Error,
258                    message: format!(
259                        "Table of contents generation failed: {e}"
260                    ),
261                };
262                warn!("{d}");
263                diagnostics.push(d);
264            }
265        }
266    }
267
268    // Step 4b: Math — convert $..$ / $$..$$ to inline MathML.
269    // Infallible: pulldown-latex encodes parse errors inline as
270    // `<merror>` elements rather than returning Err, so the
271    // pipeline never has to skip this step.
272    #[cfg(feature = "math")]
273    if config.enable_math {
274        let before_len = html.len();
275        html = crate::math::convert_math(&html);
276        if html.len() != before_len {
277            diagnostics.push(Diagnostic {
278                step: "math",
279                level: DiagnosticLevel::Info,
280                message: "LaTeX math rendered to MathML".to_string(),
281            });
282        }
283    }
284
285    // Step 4c: Diagrams — rewrite mermaid fenced blocks for client-side mermaid.js
286    if config.enable_diagrams {
287        let before_len = html.len();
288        html = crate::math::rewrite_mermaid_blocks(&html);
289        if html.len() != before_len {
290            diagnostics.push(Diagnostic {
291                step: "diagrams",
292                level: DiagnosticLevel::Info,
293                message:
294                    "Mermaid blocks rewritten for client-side rendering"
295                        .to_string(),
296            });
297        }
298    }
299
300    // Step 5: Parse DOM once for read-only steps (SEO, heading extraction)
301    let document = scraper::Html::parse_document(&html);
302
303    // Step 5a: Structured data — generate JSON-LD
304    let mut json_ld_fragment = String::new();
305    if config.generate_structured_data {
306        match generate_structured_data_from_doc(&document, None) {
307            Ok(json_ld) => {
308                json_ld_fragment = json_ld;
309                diagnostics.push(Diagnostic {
310                    step: "structured_data",
311                    level: DiagnosticLevel::Info,
312                    message: "JSON-LD structured data generated"
313                        .to_string(),
314                });
315            }
316            Err(e) => {
317                let d = Diagnostic {
318                    step: "structured_data",
319                    level: DiagnosticLevel::Error,
320                    message: format!(
321                        "Structured data generation failed: {e}"
322                    ),
323                };
324                warn!("{d}");
325                diagnostics.push(d);
326            }
327        }
328    }
329
330    // Step 6: Full document wrapping or fragment language injection
331    if config.generate_full_document {
332        // Extract title from already-parsed DOM (no extra parse)
333        let title = extract_first_heading_from_doc(&document);
334        html = wrap_full_document(
335            &html,
336            &json_ld_fragment,
337            title.as_deref(),
338            config,
339        );
340    } else {
341        // Fragment mode: append JSON-LD at the end (legacy behaviour)
342        if !json_ld_fragment.is_empty() {
343            html.push_str(&json_ld_fragment);
344        }
345        // Wrap in a lang div when the user set a non-default language
346        if config.language != crate::constants::DEFAULT_LANGUAGE {
347            html = format!(
348                "<div lang=\"{}\">{}</div>",
349                escape_html(&config.language),
350                html
351            );
352        }
353    }
354
355    // Step 7: Minification
356    if config.minify_output {
357        let before_len = html.len();
358        match minify_html_string(&html) {
359            Ok(minified) => {
360                let saved = before_len.saturating_sub(minified.len());
361                html = minified;
362                diagnostics.push(Diagnostic {
363                    step: "minification",
364                    level: DiagnosticLevel::Info,
365                    message: format!(
366                        "Minified: saved {} bytes ({:.0}%)",
367                        saved,
368                        if before_len > 0 {
369                            saved as f64 / before_len as f64 * 100.0
370                        } else {
371                            0.0
372                        }
373                    ),
374                });
375            }
376            Err(e) => {
377                let d = Diagnostic {
378                    step: "minification",
379                    level: DiagnosticLevel::Error,
380                    message: format!("Minification failed: {e}"),
381                };
382                warn!("{d}");
383                diagnostics.push(d);
384            }
385        }
386    }
387
388    Ok(HtmlOutput { html, diagnostics })
389}
390
391/// Wraps HTML body content in a valid HTML5 document skeleton.
392fn wrap_full_document(
393    body: &str,
394    json_ld: &str,
395    title: Option<&str>,
396    config: &crate::HtmlConfig,
397) -> String {
398    let lang = escape_html(&config.language);
399    let mut head = String::from("<meta charset=\"utf-8\">");
400
401    if let Some(t) = title {
402        head.push_str(&format!("<title>{}</title>", escape_html(t)));
403    }
404
405    if !json_ld.is_empty() {
406        head.push_str(json_ld);
407    }
408
409    format!(
410        "<!DOCTYPE html>\n<html lang=\"{lang}\">\n<head>{head}</head>\n<body>\n{body}\n</body>\n</html>"
411    )
412}
413
414/// Selector for the first heading; the source content is a compile-time
415/// constant, so parsing is infallible at runtime.
416static H1_SELECTOR: Lazy<scraper::Selector> = Lazy::new(|| {
417    scraper::Selector::parse("h1")
418        .expect("static H1_SELECTOR must parse")
419});
420
421/// Extracts text content from the first `<h1>` in a pre-parsed DOM.
422fn extract_first_heading_from_doc(
423    document: &scraper::Html,
424) -> Option<String> {
425    document
426        .select(&H1_SELECTOR)
427        .next()
428        .map(|el| el.text().collect::<String>())
429}
430
431/// Convert Markdown to HTML with specified extensions using `mdx-gen`.
432///
433/// Uses [`crate::HtmlConfig::default`] under the hood; for full control
434/// over the pipeline use [`generate_html`] directly.
435///
436/// # Examples
437///
438/// ```
439/// use html_generator::generator::markdown_to_html_with_extensions;
440///
441/// let html = markdown_to_html_with_extensions("**bold**").unwrap();
442/// assert!(html.contains("<strong>bold</strong>"));
443/// ```
444///
445/// # Errors
446///
447/// Returns [`crate::error::HtmlError::MarkdownConversion`] if the
448/// underlying `comrak`/`mdx-gen` parse fails.
449pub fn markdown_to_html_with_extensions(
450    markdown: &str,
451) -> Result<String> {
452    markdown_to_html_impl(markdown, &crate::HtmlConfig::default())
453}
454
455#[cfg(not(target_arch = "wasm32"))]
456fn markdown_to_html_impl(
457    markdown: &str,
458    config: &crate::HtmlConfig,
459) -> Result<String> {
460    // 1) Extract front matter
461    let content_without_front_matter = extract_front_matter(markdown)
462        .unwrap_or_else(|_| markdown.to_string());
463
464    // 2) Convert triple-colon blocks (no-alloc when no `:::` match).
465    let markdown_with_classes = add_custom_classes(
466        &content_without_front_matter,
467        config.allow_unsafe_html,
468    );
469
470    // 3) Convert images with `.class="..."` (no-alloc when no match).
471    let markdown_with_images =
472        process_images_with_classes(&markdown_with_classes);
473
474    // 4) Clone the cached Options tree and set the two runtime-varying
475    //    bits (unsafe HTML + syntax highlighting/theme).
476    let mut comrak_options = BASE_COMRAK_OPTIONS.clone();
477    comrak_options.render.r#unsafe = config.allow_unsafe_html;
478
479    let mut md_options = MarkdownOptions::default()
480        .with_comrak_options(comrak_options)
481        .with_syntax_highlighting(config.enable_syntax_highlighting);
482
483    if let Some(ref theme) = config.syntax_theme {
484        md_options = md_options.with_custom_theme(theme.clone());
485    }
486
487    // 5) Convert final Markdown to HTML
488    process_markdown(&markdown_with_images, &md_options).map_err(
489        |err| HtmlError::markdown_conversion(err.to_string(), None),
490    )
491}
492
493/// WASM-target Markdown → HTML.
494///
495/// Bypasses `mdx-gen` (which pulls in `tokio` unconditionally and
496/// therefore does not compile to `wasm32-unknown-unknown`) and calls
497/// `comrak` directly with the same extension flags that `mdx-gen`
498/// would have set. Custom classes (`:::warning`), image-class
499/// syntax, and `syntect` syntax highlighting are not available in
500/// this build path; everything else (CommonMark + GFM tables,
501/// strikethrough, autolinks, tasklists, superscript) renders
502/// identically to the native pipeline.
503#[cfg(target_arch = "wasm32")]
504fn markdown_to_html_impl(
505    markdown: &str,
506    config: &crate::HtmlConfig,
507) -> Result<String> {
508    let content_without_front_matter = extract_front_matter(markdown)
509        .unwrap_or_else(|_| markdown.to_string());
510
511    let mut opts = BASE_COMRAK_OPTIONS.clone();
512    opts.render.r#unsafe = config.allow_unsafe_html;
513
514    Ok(comrak::markdown_to_html(
515        &content_without_front_matter,
516        &opts,
517    ))
518}
519
520/// Re-parse inline Markdown for triple-colon blocks, e.g.:
521///
522/// ```markdown
523/// :::warning
524/// **Caution:** This is risky.
525/// :::
526/// ```
527///
528/// Produces something like:
529/// ```html
530/// <div class="warning"><strong>Caution:</strong> This is risky.</div>
531/// ```
532///
533/// # Example
534/// ...
535#[cfg(not(target_arch = "wasm32"))]
536fn add_custom_classes(
537    markdown: &str,
538    allow_unsafe_html: bool,
539) -> Cow<'_, str> {
540    // `regex::Regex::replace_all` returns `Cow::Borrowed(markdown)`
541    // when there are zero matches — avoiding the allocation
542    // entirely for the common case of a document without `:::` blocks.
543    CUSTOM_CLASS_REGEX.replace_all(
544        markdown,
545        |caps: &regex::Captures| {
546            let class_name = &caps[1];
547            let block_content = &caps[2];
548
549            let inline_html = match process_markdown_inline_impl(
550                block_content,
551                allow_unsafe_html,
552            ) {
553                Ok(html) => html,
554                Err(_) => block_content.to_string(),
555            };
556
557            // class_name is validated by the \w+ regex — safe to interpolate
558            format!(
559                "<div class=\"{}\">{}</div>",
560                class_name, inline_html
561            )
562        },
563    )
564}
565
566/// Processes inline Markdown (bold, italics, links, etc.) without block-level syntax.
567///
568/// # Examples
569///
570/// ```
571/// use html_generator::generator::process_markdown_inline;
572///
573/// let html = process_markdown_inline("**bold** and *italic*").unwrap();
574/// assert!(html.contains("<strong>bold</strong>"));
575/// assert!(html.contains("<em>italic</em>"));
576/// ```
577///
578/// # Errors
579///
580/// Returns the underlying `mdx-gen` error if Markdown parsing fails.
581pub fn process_markdown_inline(
582    content: &str,
583) -> std::result::Result<String, Box<dyn Error>> {
584    process_markdown_inline_impl(content, false)
585}
586
587#[cfg(not(target_arch = "wasm32"))]
588fn process_markdown_inline_impl(
589    content: &str,
590    allow_unsafe_html: bool,
591) -> std::result::Result<String, Box<dyn Error>> {
592    // Inline rendering shares the same extension tree as the outer
593    // pipeline; clone from the cached base rather than rebuilding.
594    let mut comrak_opts = BASE_COMRAK_OPTIONS.clone();
595    comrak_opts.render.r#unsafe = allow_unsafe_html;
596
597    let options =
598        MarkdownOptions::default().with_comrak_options(comrak_opts);
599    Ok(process_markdown(content, &options)?)
600}
601
602/// WASM-target inline Markdown rendering. See the `markdown_to_html_impl`
603/// WASM variant for the rationale (no `mdx-gen` on `wasm32`).
604#[cfg(target_arch = "wasm32")]
605fn process_markdown_inline_impl(
606    content: &str,
607    allow_unsafe_html: bool,
608) -> std::result::Result<String, Box<dyn Error>> {
609    let mut opts = BASE_COMRAK_OPTIONS.clone();
610    opts.render.r#unsafe = allow_unsafe_html;
611    Ok(comrak::markdown_to_html(content, &opts))
612}
613
614/// Replaces image patterns like
615/// `![Alt text](URL).class="some-class"` with `<img src="URL" alt="Alt text" class="some-class" />`.
616#[cfg(not(target_arch = "wasm32"))]
617fn process_images_with_classes(markdown: &str) -> Cow<'_, str> {
618    // Borrowed-Cow when the document has no `![alt](url).class="x"`
619    // construct — i.e. every typical document.
620    IMAGE_CLASS_REGEX.replace_all(markdown, |caps: &regex::Captures| {
621        format!(
622            r#"<img src="{}" alt="{}" class="{}" />"#,
623            escape_html(&caps[2]), // URL
624            escape_html(&caps[1]), // alt text
625            escape_html(&caps[3]), // class attribute
626        )
627    })
628}
629
630#[cfg(test)]
631mod tests {
632    use super::*;
633    use crate::HtmlConfig;
634
635    /// Test basic Markdown to HTML conversion.
636    ///
637    /// This test verifies that a simple Markdown input is correctly converted to HTML.
638    #[test]
639    fn test_generate_html_basic() {
640        let markdown = "# Hello, world!\n\nThis is a test.";
641        let config = HtmlConfig::default();
642        let result = generate_html(markdown, &config);
643        assert!(result.is_ok());
644        let html = result.unwrap();
645        assert!(html.contains("<h1>Hello, world!</h1>"));
646        assert!(html.contains("<p>This is a test.</p>"));
647    }
648
649    /// Test conversion with Markdown extensions.
650    ///
651    /// This test ensures that the Markdown extensions (e.g., custom blocks, enhanced tables, etc.)
652    /// are correctly applied when converting Markdown to HTML.
653    #[test]
654    fn test_markdown_to_html_with_extensions() {
655        let markdown = r"
656| Header 1 | Header 2 |
657| -------- | -------- |
658| Row 1    | Row 2    |
659";
660        let result = markdown_to_html_with_extensions(markdown);
661        assert!(result.is_ok());
662        let html = result.unwrap();
663
664        println!("{}", html);
665
666        // Update the test to look for the div wrapper and table classes
667        assert!(html.contains("<div class=\"table-responsive\"><table class=\"table\">"), "Table element not found");
668        assert!(
669            html.contains("<th>Header 1</th>"),
670            "Table header not found"
671        );
672        assert!(
673            html.contains("<td class=\"text-left\">Row 1</td>"),
674            "Table row not found"
675        );
676    }
677
678    /// Test conversion of empty Markdown.
679    ///
680    /// This test checks that an empty Markdown input results in an empty HTML string.
681    #[test]
682    fn test_generate_html_empty() {
683        let markdown = "";
684        let config = HtmlConfig::default();
685        let result = generate_html(markdown, &config);
686        assert!(result.is_ok());
687        let html = result.unwrap();
688        assert!(html.is_empty());
689    }
690
691    /// Test handling of invalid Markdown.
692    ///
693    /// This test verifies that even with poorly formatted Markdown, the function
694    /// will not panic and will return valid HTML.
695    #[test]
696    fn test_generate_html_invalid_markdown() {
697        let markdown = "# Unclosed header\nSome **unclosed bold";
698        let config = HtmlConfig::default();
699        let result = generate_html(markdown, &config);
700        assert!(result.is_ok());
701        let html = result.unwrap();
702
703        println!("{}", html);
704
705        assert!(
706            html.contains("<h1>Unclosed header</h1>"),
707            "Header not found"
708        );
709        assert!(
710            html.contains("<p>Some **unclosed bold</p>"),
711            "Unclosed bold tag not properly handled"
712        );
713    }
714
715    /// Test conversion with complex Markdown content.
716    ///
717    /// This test checks how the function handles more complex Markdown input with various
718    /// elements like lists, headers, code blocks, and links.
719    /// Test conversion with complex Markdown content.
720    #[test]
721    fn test_generate_html_complex() {
722        let markdown = r#"
723# Header
724
725## Subheader
726
727Some `inline code` and a [link](https://example.com).
728
729```rust
730fn main() {
731    println!("Hello, world!");
732}
733```
734
7351. First item
7362. Second item
737"#;
738        let config = HtmlConfig::default();
739        let result = generate_html(markdown, &config);
740        assert!(result.is_ok());
741        let html = result.unwrap();
742        println!("{}", html);
743
744        // Verify the header and subheader
745        assert!(
746            html.contains("<h1>Header</h1>"),
747            "H1 Header not found"
748        );
749        assert!(
750            html.contains("<h2>Subheader</h2>"),
751            "H2 Header not found"
752        );
753
754        // Verify the inline code and link
755        assert!(
756            html.contains("<code>inline code</code>"),
757            "Inline code not found"
758        );
759        assert!(
760            html.contains(r#"<a href="https://example.com">link</a>"#),
761            "Link not found"
762        );
763
764        // Verify the code block structure
765        assert!(
766            html.contains(r#"<code class="language-rust">"#),
767            "Code block with language-rust class not found"
768        );
769        assert!(
770            html.contains(r#"<span style="color:#b48ead;">fn </span>"#),
771            "`fn` keyword with syntax highlighting not found"
772        );
773        assert!(
774            html.contains(
775                r#"<span style="color:#8fa1b3;">main</span>"#
776            ),
777            "`main` function name with syntax highlighting not found"
778        );
779
780        // Check for the ordered list items
781        assert!(
782            html.contains("<li>First item</li>"),
783            "First item not found"
784        );
785        assert!(
786            html.contains("<li>Second item</li>"),
787            "Second item not found"
788        );
789    }
790
791    /// Test handling of valid front matter.
792    #[test]
793    fn test_generate_html_with_valid_front_matter() {
794        let markdown = r#"---
795title: Test
796author: Jane Doe
797---
798# Hello, world!"#;
799        let config = HtmlConfig::default();
800        let result = generate_html(markdown, &config);
801        assert!(result.is_ok());
802        let html = result.unwrap();
803        assert!(html.contains("<h1>Hello, world!</h1>"));
804    }
805
806    /// Test handling of invalid front matter.
807    #[test]
808    fn test_generate_html_with_invalid_front_matter() {
809        let markdown = r#"---
810title Test
811author: Jane Doe
812---
813# Hello, world!"#;
814        let config = HtmlConfig::default();
815        let result = generate_html(markdown, &config);
816        assert!(
817            result.is_ok(),
818            "Invalid front matter should be ignored"
819        );
820        let html = result.unwrap();
821        assert!(html.contains("<h1>Hello, world!</h1>"));
822    }
823
824    /// Test with a large Markdown input.
825    #[test]
826    fn test_generate_html_large_input() {
827        let markdown = "# Large Markdown\n\n".repeat(10_000);
828        let config = HtmlConfig::default();
829        let result = generate_html(&markdown, &config);
830        assert!(result.is_ok());
831        let html = result.unwrap();
832        assert!(html.contains("<h1>Large Markdown</h1>"));
833    }
834
835    /// Test with different MarkdownOptions configurations.
836    #[test]
837    fn test_generate_html_with_custom_markdown_options() {
838        let markdown = "**Bold text**";
839        let config = HtmlConfig::default();
840        let result = generate_html(markdown, &config);
841        assert!(result.is_ok());
842        let html = result.unwrap();
843        assert!(html.contains("<strong>Bold text</strong>"));
844    }
845
846    /// Test unsupported Markdown elements.
847    #[test]
848    fn test_generate_html_with_unsupported_elements() {
849        let markdown = "::: custom_block\nContent\n:::";
850        let config = HtmlConfig::default();
851        let result = generate_html(markdown, &config);
852        assert!(result.is_ok());
853        let html = result.unwrap();
854        assert!(html.contains("::: custom_block"));
855    }
856
857    /// Test error handling for invalid Markdown conversion.
858    #[test]
859    fn test_markdown_to_html_with_conversion_error() {
860        let markdown = "# Unclosed header\nSome **unclosed bold";
861        let result = markdown_to_html_with_extensions(markdown);
862        assert!(result.is_ok());
863        let html = result.unwrap();
864        assert!(html.contains("<p>Some **unclosed bold</p>"));
865    }
866
867    /// Test handling of whitespace-only Markdown.
868    #[test]
869    fn test_generate_html_whitespace_only() {
870        let markdown = "   \n   ";
871        let config = HtmlConfig::default();
872        let result = generate_html(markdown, &config);
873        assert!(result.is_ok());
874        let html = result.unwrap();
875        assert!(
876            html.is_empty(),
877            "Whitespace-only Markdown should produce empty HTML"
878        );
879    }
880
881    /// Test customization of Options.
882    ///
883    /// Native-only: drives `mdx_gen::{MarkdownOptions, process_markdown}`
884    /// which are not available on the wasm32 build path.
885    #[cfg(not(target_arch = "wasm32"))]
886    #[test]
887    fn test_markdown_to_html_with_custom_comrak_options() {
888        let markdown = "^^Superscript^^\n\n| Header 1 | Header 2 |\n| -------- | -------- |\n| Row 1    | Row 2    |";
889
890        // Configure Options with necessary extensions
891        let mut comrak_options = Options::default();
892        comrak_options.extension.superscript = true;
893        comrak_options.extension.table = true; // Enable table to match MarkdownOptions
894
895        // Synchronize MarkdownOptions with Options
896        let options = MarkdownOptions::default()
897            .with_comrak_options(comrak_options.clone());
898        let content_without_front_matter =
899            extract_front_matter(markdown)
900                .unwrap_or(markdown.to_string());
901
902        println!("Comrak options: {:?}", comrak_options);
903
904        let result =
905            process_markdown(&content_without_front_matter, &options);
906
907        match result {
908            Ok(ref html) => {
909                // Assert superscript rendering
910                assert!(
911                    html.contains("<sup>Superscript</sup>"),
912                    "Superscript not found in HTML output"
913                );
914
915                // Assert table rendering
916                assert!(
917                    html.contains("<table"),
918                    "Table element not found in HTML output"
919                );
920            }
921            Err(err) => {
922                panic!(
923                    "Failed to process Markdown with custom Options: {:?}",
924                    err
925                );
926            }
927        }
928    }
929    #[test]
930    fn test_generate_html_with_default_config() {
931        let markdown = "# Default Configuration Test";
932        let config = HtmlConfig::default();
933        let result = generate_html(markdown, &config);
934        assert!(result.is_ok());
935        let html = result.unwrap();
936        assert!(html.contains("<h1>Default Configuration Test</h1>"));
937    }
938
939    #[test]
940    fn test_generate_html_with_custom_front_matter_delimiter() {
941        let markdown = r#";;;;
942title: Custom
943author: John Doe
944;;;;
945# Custom Front Matter Delimiter"#;
946
947        let config = HtmlConfig::default();
948        let result = generate_html(markdown, &config);
949        assert!(result.is_ok());
950        let html = result.unwrap();
951        assert!(html.contains("<h1>Custom Front Matter Delimiter</h1>"));
952    }
953    #[test]
954    fn test_generate_html_with_task_list() {
955        let markdown = r"
956- [x] Task 1
957- [ ] Task 2
958";
959
960        let result = markdown_to_html_with_extensions(markdown);
961        assert!(result.is_ok());
962        let html = result.unwrap();
963
964        println!("Generated HTML:\n{}", html);
965
966        // Adjust assertions to match the rendered HTML structure
967        assert!(
968        html.contains(r#"<li><input type="checkbox" checked="" disabled="" /> Task 1</li>"#),
969        "Task 1 checkbox not rendered as expected"
970    );
971        assert!(
972        html.contains(r#"<li><input type="checkbox" disabled="" /> Task 2</li>"#),
973        "Task 2 checkbox not rendered as expected"
974    );
975    }
976    #[test]
977    fn test_generate_html_with_large_table() {
978        let header =
979            "| Header 1 | Header 2 |\n| -------- | -------- |\n";
980        let rows = "| Row 1    | Row 2    |\n".repeat(1000);
981        let markdown = format!("{}{}", header, rows);
982
983        let result = markdown_to_html_with_extensions(&markdown);
984        assert!(result.is_ok());
985        let html = result.unwrap();
986
987        let row_count = html.matches("<tr>").count();
988        assert_eq!(
989            row_count, 1001,
990            "Incorrect number of rows: {}",
991            row_count
992        ); // 1 header + 1000 rows
993    }
994    #[test]
995    fn test_generate_html_with_special_characters() {
996        let markdown = r#"Markdown with special characters: <, >, &, "quote", 'single-quote'."#;
997        let result = markdown_to_html_with_extensions(markdown);
998        assert!(result.is_ok());
999        let html = result.unwrap();
1000
1001        assert!(html.contains("&lt;"), "Less than sign not escaped");
1002        assert!(html.contains("&gt;"), "Greater than sign not escaped");
1003        assert!(html.contains("&amp;"), "Ampersand not escaped");
1004        assert!(html.contains("&quot;"), "Double quote not escaped");
1005
1006        // Adjust if single quotes are intended to remain unescaped
1007        assert!(
1008            html.contains("&#39;") || html.contains("'"),
1009            "Single quote not handled as expected"
1010        );
1011    }
1012
1013    #[test]
1014    fn test_generate_html_with_invalid_markdown_syntax() {
1015        // With unsafe_html disabled (default), raw HTML tags are stripped
1016        let markdown =
1017            r"# Invalid Markdown <unexpected> [bad](url <here)";
1018        let result = markdown_to_html_with_extensions(markdown);
1019        assert!(result.is_ok());
1020        let html = result.unwrap();
1021
1022        println!("Generated HTML:\n{}", html);
1023
1024        // Raw HTML tags are stripped when unsafe=false
1025        assert!(html.contains("<h1>"), "Header tag should be present");
1026    }
1027
1028    /// Test handling of Markdown with a mix of valid and invalid syntax.
1029    #[test]
1030    fn test_generate_html_mixed_markdown() {
1031        let markdown = r"# Valid Header
1032Some **bold text** followed by invalid Markdown:
1033~~strikethrough~~ without a closing tag.";
1034        let result = markdown_to_html_with_extensions(markdown);
1035        assert!(result.is_ok());
1036        let html = result.unwrap();
1037
1038        assert!(
1039            html.contains("<h1>Valid Header</h1>"),
1040            "Header not found"
1041        );
1042        assert!(
1043            html.contains("<strong>bold text</strong>"),
1044            "Bold text not rendered correctly"
1045        );
1046        assert!(
1047            html.contains("<del>strikethrough</del>"),
1048            "Strikethrough not rendered correctly"
1049        );
1050    }
1051
1052    /// Test handling of deeply nested Markdown content.
1053    #[test]
1054    fn test_generate_html_deeply_nested_content() {
1055        let markdown = r"
10561. Level 1
1057    1.1. Level 2
1058        1.1.1. Level 3
1059            1.1.1.1. Level 4
1060";
1061        let result = markdown_to_html_with_extensions(markdown);
1062        assert!(result.is_ok());
1063        let html = result.unwrap();
1064
1065        assert!(html.contains("<ol>"), "Ordered list not rendered");
1066        assert!(html.contains("<li>Level 1"), "Level 1 not rendered");
1067        assert!(
1068            html.contains("1.1.1.1. Level 4"),
1069            "Deeply nested levels not rendered correctly"
1070        );
1071    }
1072
1073    /// Test Markdown with embedded raw HTML content (opt-in unsafe).
1074    #[test]
1075    fn test_generate_html_with_raw_html() {
1076        let markdown = r"
1077# Header with HTML
1078<p>This is a paragraph with <strong>HTML</strong>.</p>
1079";
1080        // Opt in to unsafe HTML for this test
1081        let config = HtmlConfig {
1082            allow_unsafe_html: true,
1083            ..HtmlConfig::default()
1084        };
1085        let result = generate_html(markdown, &config);
1086        assert!(result.is_ok());
1087        let html = result.unwrap();
1088
1089        assert!(
1090            html.contains("<p>This is a paragraph with <strong>HTML</strong>.</p>"),
1091            "Raw HTML content not preserved in output"
1092        );
1093    }
1094
1095    /// Test Markdown with invalid front matter format.
1096    #[test]
1097    fn test_generate_html_invalid_front_matter_handling() {
1098        let markdown = "---
1099key_without_value
1100another_key: valid
1101---
1102# Markdown Content
1103";
1104        let result = generate_html(markdown, &HtmlConfig::default());
1105        assert!(
1106            result.is_ok(),
1107            "Invalid front matter should not cause an error"
1108        );
1109        let html = result.unwrap();
1110        assert!(
1111            html.contains("<h1>Markdown Content</h1>"),
1112            "Content not processed correctly"
1113        );
1114    }
1115
1116    /// Test handling of very large front matter in Markdown.
1117    #[test]
1118    fn test_generate_html_large_front_matter() {
1119        let front_matter = "---\n".to_owned()
1120            + &"key: value\n".repeat(10_000)
1121            + "---\n# Content";
1122        let result =
1123            generate_html(&front_matter, &HtmlConfig::default());
1124        assert!(
1125            result.is_ok(),
1126            "Large front matter should be handled gracefully"
1127        );
1128        let html = result.unwrap();
1129        assert!(
1130            html.contains("<h1>Content</h1>"),
1131            "Content not rendered correctly"
1132        );
1133    }
1134
1135    /// Test handling of Markdown with long consecutive lines.
1136    #[test]
1137    fn test_generate_html_with_long_lines() {
1138        let markdown = "A ".repeat(10_000);
1139        let result = markdown_to_html_with_extensions(&markdown);
1140        assert!(result.is_ok());
1141        let html = result.unwrap();
1142
1143        assert!(
1144            html.contains("A A A A"),
1145            "Long consecutive lines should be rendered properly"
1146        );
1147    }
1148
1149    #[test]
1150    fn test_markdown_with_custom_classes() {
1151        let markdown = r":::note
1152This is a note with a custom class.
1153:::";
1154
1155        let result = markdown_to_html_with_extensions(markdown);
1156        assert!(result.is_ok(), "Markdown conversion should not fail.");
1157
1158        let html = result.unwrap();
1159        println!("HTML:\n{}", html);
1160
1161        // Ensure we see <div class="note"> in the final output:
1162        assert!(
1163            html.contains(r#"<div class="note">"#),
1164            "Custom block should wrap in <div class=\"note\">"
1165        );
1166
1167        // Ensure the block content is present:
1168        assert!(
1169            html.contains("This is a note with a custom class."),
1170            "Block text is missing or incorrectly rendered"
1171        );
1172    }
1173
1174    #[test]
1175    fn test_markdown_with_custom_blocks_and_images() {
1176        let markdown = "![A very tall building](https://example.com/image.webp).class=\"img-fluid\"";
1177        let result = markdown_to_html_with_extensions(markdown);
1178        assert!(result.is_ok());
1179        let html = result.unwrap();
1180        println!("{}", html);
1181        assert!(
1182        html.contains(r#"<img src="https://example.com/image.webp" alt="A very tall building" class="img-fluid" />"#),
1183        "First image not rendered correctly"
1184    );
1185    }
1186
1187    /// Test empty front matter handling.
1188    #[test]
1189    fn test_empty_front_matter_handling() {
1190        let markdown = "---\n---\n# Content";
1191        let result = generate_html(markdown, &HtmlConfig::default());
1192        assert!(result.is_ok());
1193        let html = result.unwrap();
1194        assert!(
1195            html.contains("<h1>Content</h1>"),
1196            "Content should be processed correctly"
1197        );
1198    }
1199
1200    /// Test invalid image syntax.
1201    ///
1202    /// Native-only: `process_images_with_classes` lives in the
1203    /// `#[cfg(not(target_arch = "wasm32"))]` half of this module
1204    /// because the WASM build path bypasses `mdx-gen`'s extension
1205    /// helpers entirely.
1206    #[cfg(not(target_arch = "wasm32"))]
1207    #[test]
1208    fn test_invalid_image_syntax() {
1209        let markdown = "![Image with missing URL]()";
1210        let result = process_images_with_classes(markdown);
1211        assert_eq!(
1212            result, markdown,
1213            "Invalid image syntax should remain unchanged"
1214        );
1215    }
1216
1217    /// Test incorrect front matter delimiters.
1218    #[test]
1219    fn test_incorrect_front_matter_delimiters() {
1220        let markdown = ";;;\ntitle: Test\n---\n# Header";
1221        let result = generate_html(markdown, &HtmlConfig::default());
1222        assert!(result.is_ok());
1223        let html = result.unwrap();
1224        assert!(
1225            html.contains("<h1>Header</h1>"),
1226            "Header should be processed correctly"
1227        );
1228    }
1229    #[cfg(test)]
1230    mod missing_scenarios_tests {
1231        use super::*;
1232
1233        /// 1) Triple-colon block with inline bold text
1234        ///
1235        /// Verifies that **Caution:** inside `:::warning` is parsed as `<strong>Caution:</strong>`.
1236        #[test]
1237        fn test_triple_colon_warning_with_bold() {
1238            let markdown = r":::warning
1239**Caution:** This operation is sensitive.
1240:::";
1241
1242            let result = markdown_to_html_with_extensions(markdown);
1243            assert!(
1244                result.is_ok(),
1245                "Markdown conversion should succeed."
1246            );
1247
1248            let html = result.unwrap();
1249            println!("HTML:\n{}", html);
1250
1251            // Expect the block to contain <strong>Caution:</strong>
1252            // plus a <div class="warning">
1253            assert!(
1254                html.contains(r#"<div class="warning">"#),
1255                "Expected <div class=\"warning\"> wrapping the block"
1256            );
1257            assert!(html.contains("<strong>Caution:</strong>"),
1258            "Expected inline bold text to become <strong>Caution:</strong>");
1259        }
1260
1261        /// 2) Multiple triple-colon blocks in the same snippet.
1262        ///
1263        /// Ensures that the parser correctly handles more than one custom block.
1264        #[test]
1265        fn test_multiple_triple_colon_blocks() {
1266            let markdown = r":::note
1267**Note:** First block
1268:::
1269
1270:::warning
1271**Warning:** Second block
1272:::";
1273
1274            let result = markdown_to_html_with_extensions(markdown);
1275            assert!(
1276                result.is_ok(),
1277                "Markdown conversion should succeed."
1278            );
1279
1280            let html = result.unwrap();
1281            println!("HTML:\n{}", html);
1282
1283            // Expect <div class="note"> ...</div> and <div class="warning"> ...</div>
1284            assert!(
1285                html.contains(r#"<div class="note">"#),
1286                "Missing <div class=\"note\"> for the first block"
1287            );
1288            assert!(
1289                html.contains(r#"<div class="warning">"#),
1290                "Missing <div class=\"warning\"> for the second block"
1291            );
1292
1293            // Check inline markdown
1294            assert!(
1295                html.contains("<strong>Note:</strong>"),
1296                "Bold text in the note block not parsed"
1297            );
1298            assert!(
1299                html.contains("<strong>Warning:</strong>"),
1300                "Bold text in the warning block not parsed"
1301            );
1302        }
1303
1304        /// 3) Triple-colon block with multi-paragraph content
1305        ///
1306        /// Checks how inline parsing deals with extra blank lines and multiple paragraphs.
1307        #[test]
1308        fn test_triple_colon_block_multi_paragraph() {
1309            let markdown = r":::note
1310**Paragraph 1:** This is the first paragraph.
1311
1312This is the second paragraph, also with **bold** text.
1313:::";
1314
1315            let result = markdown_to_html_with_extensions(markdown);
1316            assert!(
1317                result.is_ok(),
1318                "Markdown conversion should succeed."
1319            );
1320
1321            let html = result.unwrap();
1322            println!("HTML:\n{}", html);
1323
1324            // The block is inline-processed. Paragraphs might be combined or
1325            // each appear in separate <p> tags, depending on the parser.
1326            // Typically, inline parsing doesn't break paragraphs. If you want block-level
1327            // formatting, you'd need a full block parse. But let's at least confirm bold text.
1328            assert!(
1329                html.contains("<strong>Paragraph 1:</strong>"),
1330                "Inline bold text not parsed in the first paragraph"
1331            );
1332            assert!(html.contains("second paragraph, also with <strong>bold</strong> text"),
1333            "Inline bold text not parsed in the second paragraph");
1334        }
1335
1336        /// 4) Fallback logic: forcing an error in `process_markdown_inline`
1337        ///
1338        /// We'll create a scenario that intentionally breaks the inline parser.
1339        /// If an error occurs, we expect the raw text (with triple-colon block content).
1340        #[test]
1341        fn test_triple_colon_block_forcing_inline_error() {
1342            // Suppose the inline parser fails when we pass some nonsense markup or unhandled structure.
1343            // It's not always guaranteed to fail, but let's try an improbable snippet:
1344            let markdown = r":::error
1345This block tries < to break > inline parsing & [some link (unclosed).
1346:::";
1347
1348            // We'll artificially modify the parser to fail if it sees "[some link (unclosed)."
1349            // But since your code doesn't do that by default, we can't *guarantee* a real error.
1350            // We'll at least check that, if an error *did* occur, we fallback to raw text.
1351            //
1352            // For demonstration, let's proceed with the test and see if it just parses or not.
1353            let result = markdown_to_html_with_extensions(markdown);
1354            assert!(
1355                result.is_ok(),
1356                "We won't forcibly error, but let's see the output."
1357            );
1358
1359            let html = result.unwrap();
1360            println!("HTML:\n{}", html);
1361
1362            // If your parser did handle it, we'll just check the block.
1363            // If your parser chokes, you'd see a fallback with raw text.
1364            // Let's verify there's a <div class="error"> either way:
1365            assert!(
1366                html.contains(r#"<div class="error">"#),
1367                "Block div not found for 'error' class"
1368            );
1369
1370            // If the inline parser didn't fail, we might see <p> with weird text.
1371            // If it fails, we should see the original snippet inside the block.
1372            // We'll just check that it's not empty.
1373            assert!(
1374                html.contains("This block tries "),
1375                "Expected parsed content in the block"
1376            );
1377        }
1378    }
1379}