Skip to main content

html_generator/
lib.rs

1#![forbid(unsafe_code)]
2// Copyright © 2025 HTML Generator. All rights reserved.
3// SPDX-License-Identifier: Apache-2.0 OR MIT
4#![doc = include_str!("../README.md")]
5#![doc(
6    html_favicon_url = "https://cloudcdn.pro/html-generator/v1/favicon.ico",
7    html_logo_url = "https://cloudcdn.pro/html-generator/v1/logos/html-generator.svg",
8    html_root_url = "https://docs.rs/html-generator"
9)]
10#![crate_name = "html_generator"]
11#![crate_type = "lib"]
12
13use std::{
14    fmt,
15    fs::File,
16    io::{self, BufReader, BufWriter, Read, Write},
17    path::{Component, Path},
18};
19
20/// Maximum buffer size for reading files (16MB)
21const MAX_BUFFER_SIZE: usize = 16 * 1024 * 1024;
22
23// Re-export public modules
24pub mod accessibility;
25pub mod elements;
26pub mod emojis;
27pub mod error;
28pub mod generator;
29pub mod math;
30pub mod performance;
31pub mod seo;
32pub mod utils;
33
34// WebAssembly bindings — compiled in only when the crate is built
35// with `--features wasm`.
36#[cfg(feature = "wasm")]
37pub mod wasm;
38
39// Inlined private YAML serde implementation (upstream:
40// `/Users/seb/Code/Public/Rust/yaml_safe`). Kept private so the
41// crate's external surface is unchanged.
42mod yaml;
43
44// Re-export primary types and functions for convenience
45pub use crate::error::HtmlError;
46pub use accessibility::{add_aria_attributes, validate_wcag};
47pub use emojis::load_emoji_sequences;
48pub use generator::{
49    generate_html, generate_html_with_diagnostics, Diagnostic,
50    DiagnosticLevel, HtmlOutput,
51};
52#[cfg(feature = "async")]
53pub use performance::async_generate_html;
54pub use performance::{minify_html, minify_html_string};
55pub use seo::{generate_meta_tags, generate_structured_data};
56pub use utils::{
57    extract_front_matter, extract_front_matter_data,
58    format_header_with_id_class,
59};
60
61/// Common constants used throughout the library.
62///
63/// This module contains configuration values and limits that help ensure
64/// secure and efficient operation of the library.
65///
66/// # Examples
67///
68/// ```
69/// use html_generator::constants::{DEFAULT_LANGUAGE, DEFAULT_MAX_INPUT_SIZE};
70///
71/// assert_eq!(DEFAULT_LANGUAGE, "en-GB");
72/// assert!(DEFAULT_MAX_INPUT_SIZE > 0);
73/// ```
74pub mod constants {
75    /// Maximum allowed input size (5MB) to prevent denial of service attacks.
76    ///
77    /// # Examples
78    ///
79    /// ```
80    /// use html_generator::constants::DEFAULT_MAX_INPUT_SIZE;
81    /// assert_eq!(DEFAULT_MAX_INPUT_SIZE, 5 * 1024 * 1024);
82    /// ```
83    pub const DEFAULT_MAX_INPUT_SIZE: usize = 5 * 1024 * 1024;
84
85    /// Minimum required input size (1KB) for meaningful processing.
86    ///
87    /// # Examples
88    ///
89    /// ```
90    /// use html_generator::constants::MIN_INPUT_SIZE;
91    /// assert_eq!(MIN_INPUT_SIZE, 1024);
92    /// ```
93    pub const MIN_INPUT_SIZE: usize = 1024;
94
95    /// Default language code for HTML generation (British English).
96    ///
97    /// # Examples
98    ///
99    /// ```
100    /// use html_generator::constants::DEFAULT_LANGUAGE;
101    /// assert_eq!(DEFAULT_LANGUAGE, "en-GB");
102    /// ```
103    pub const DEFAULT_LANGUAGE: &str = "en-GB";
104
105    /// Default syntax highlighting theme (`github`).
106    ///
107    /// # Examples
108    ///
109    /// ```
110    /// use html_generator::constants::DEFAULT_SYNTAX_THEME;
111    /// assert_eq!(DEFAULT_SYNTAX_THEME, "github");
112    /// ```
113    pub const DEFAULT_SYNTAX_THEME: &str = "github";
114
115    /// Maximum file path length.
116    ///
117    /// # Examples
118    ///
119    /// ```
120    /// use html_generator::constants::MAX_PATH_LENGTH;
121    /// assert_eq!(MAX_PATH_LENGTH, 4096);
122    /// ```
123    pub const MAX_PATH_LENGTH: usize = 4096;
124
125    /// Regular expression pattern for validating language codes.
126    ///
127    /// # Examples
128    ///
129    /// ```
130    /// use html_generator::constants::LANGUAGE_CODE_PATTERN;
131    /// use regex::Regex;
132    ///
133    /// let re = Regex::new(LANGUAGE_CODE_PATTERN).unwrap();
134    /// assert!(re.is_match("en-GB"));
135    /// ```
136    pub const LANGUAGE_CODE_PATTERN: &str = r"^[a-z]{2}-[A-Z]{2}$";
137
138    /// Verify invariants at compile time
139    const _: () = assert!(MIN_INPUT_SIZE <= DEFAULT_MAX_INPUT_SIZE);
140    const _: () = assert!(MAX_PATH_LENGTH > 0);
141}
142
143/// Result type alias for library operations.
144///
145/// # Examples
146///
147/// ```
148/// use html_generator::{error::HtmlError, Result};
149///
150/// fn run() -> Result<()> {
151///     Err(HtmlError::InvalidInput("demo".into()))
152/// }
153/// assert!(run().is_err());
154/// ```
155pub type Result<T> = std::result::Result<T, HtmlError>;
156
157/// Legacy configuration type — use [`HtmlConfig`] directly instead.
158///
159/// This type is kept for backward compatibility. The `encoding` field
160/// has been moved into `HtmlConfig` itself.
161#[deprecated(
162    since = "0.0.4",
163    note = "use HtmlConfig directly — encoding is now a field on HtmlConfig"
164)]
165#[derive(Debug, Clone, Eq, PartialEq)]
166pub struct MarkdownConfig {
167    /// The encoding to use for input/output (defaults to "utf-8")
168    pub encoding: String,
169
170    /// HTML generation configuration
171    pub html_config: HtmlConfig,
172}
173
174#[allow(deprecated)]
175impl Default for MarkdownConfig {
176    fn default() -> Self {
177        Self {
178            encoding: String::from("utf-8"),
179            html_config: HtmlConfig::default(),
180        }
181    }
182}
183
184#[allow(deprecated)]
185impl From<MarkdownConfig> for HtmlConfig {
186    fn from(mc: MarkdownConfig) -> Self {
187        let mut c = mc.html_config;
188        c.encoding = mc.encoding;
189        c
190    }
191}
192
193/// Errors that can occur during configuration.
194///
195/// # Examples
196///
197/// ```
198/// use html_generator::ConfigError;
199///
200/// let err = ConfigError::InvalidLanguageCode("xx".into());
201/// assert!(err.to_string().contains("Invalid language code"));
202/// ```
203#[derive(Debug, thiserror::Error)]
204#[non_exhaustive]
205pub enum ConfigError {
206    /// Error for invalid input size configuration
207    #[error(
208        "Invalid input size: {0} bytes is below minimum of {1} bytes"
209    )]
210    InvalidInputSize(usize, usize),
211
212    /// Error for invalid language code
213    #[error("Invalid language code: {0}")]
214    InvalidLanguageCode(String),
215
216    /// Error for invalid file path
217    #[error("Invalid file path: {0}")]
218    InvalidFilePath(String),
219}
220
221/// Output destination for HTML generation.
222///
223/// Specifies where the generated HTML content should be written.
224///
225/// # Examples
226///
227/// Writing HTML to a file:
228/// ```
229/// use std::fs::File;
230/// use html_generator::OutputDestination;
231///
232/// let output = OutputDestination::File("output.html".to_string());
233/// ```
234///
235/// Writing HTML to an in-memory buffer:
236/// ```
237/// use std::io::Cursor;
238/// use html_generator::OutputDestination;
239///
240/// let buffer = Cursor::new(Vec::new());
241/// let output = OutputDestination::Writer(Box::new(buffer));
242/// ```
243///
244/// Writing HTML to standard output:
245/// ```
246/// use html_generator::OutputDestination;
247///
248/// let output = OutputDestination::Stdout;
249/// ```
250#[non_exhaustive]
251pub enum OutputDestination {
252    /// Write output to a file at the specified path.
253    ///
254    /// # Example
255    ///
256    /// ```
257    /// use html_generator::OutputDestination;
258    ///
259    /// let output = OutputDestination::File("output.html".to_string());
260    /// ```
261    File(String),
262
263    /// Write output using a custom writer implementation.
264    ///
265    /// This can be used for in-memory buffers, network streams,
266    /// or other custom output destinations.
267    ///
268    /// # Example
269    ///
270    /// ```
271    /// use std::io::Cursor;
272    /// use html_generator::OutputDestination;
273    ///
274    /// let buffer = Cursor::new(Vec::new());
275    /// let output = OutputDestination::Writer(Box::new(buffer));
276    /// ```
277    Writer(Box<dyn Write>),
278
279    /// Write output to standard output (default).
280    ///
281    /// This is useful for command-line tools and scripts.
282    ///
283    /// # Example
284    ///
285    /// ```
286    /// use html_generator::OutputDestination;
287    ///
288    /// let output = OutputDestination::Stdout;
289    /// ```
290    Stdout,
291}
292
293/// Default implementation for OutputDestination.
294impl Default for OutputDestination {
295    fn default() -> Self {
296        Self::Stdout
297    }
298}
299
300/// Debug implementation for OutputDestination.
301impl fmt::Debug for OutputDestination {
302    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
303        match self {
304            Self::File(path) => {
305                f.debug_tuple("File").field(path).finish()
306            }
307            Self::Writer(_) => write!(f, "Writer(<dyn Write>)"),
308            Self::Stdout => write!(f, "Stdout"),
309        }
310    }
311}
312
313/// Implements `Display` for `OutputDestination`.
314impl fmt::Display for OutputDestination {
315    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
316        match self {
317            OutputDestination::File(path) => {
318                write!(f, "File({})", path)
319            }
320            OutputDestination::Writer(_) => {
321                write!(f, "Writer(<dyn Write>)")
322            }
323            OutputDestination::Stdout => write!(f, "Stdout"),
324        }
325    }
326}
327
328/// Configuration options for HTML generation.
329///
330/// Controls various aspects of the HTML generation process including
331/// syntax highlighting, accessibility features, and output formatting.
332///
333/// # Examples
334///
335/// ```
336/// use html_generator::HtmlConfig;
337///
338/// let cfg = HtmlConfig::default();
339/// assert!(cfg.add_aria_attributes);
340/// assert_eq!(cfg.language, "en-GB");
341/// ```
342#[derive(Debug, PartialEq, Eq, Clone)]
343pub struct HtmlConfig {
344    /// Enable syntax highlighting for code blocks
345    pub enable_syntax_highlighting: bool,
346
347    /// Theme to use for syntax highlighting
348    pub syntax_theme: Option<String>,
349
350    /// Minify the generated HTML output
351    pub minify_output: bool,
352
353    /// Automatically add ARIA attributes for accessibility
354    pub add_aria_attributes: bool,
355
356    /// Generate structured data (JSON-LD) based on content
357    pub generate_structured_data: bool,
358
359    /// Maximum size (in bytes) for input content
360    pub max_input_size: usize,
361
362    /// Language for generated content
363    pub language: String,
364
365    /// Enable table of contents generation
366    pub generate_toc: bool,
367
368    /// Allow raw HTML passthrough in Markdown conversion.
369    ///
370    /// When `false` (the default), raw HTML tags in Markdown input are
371    /// stripped from the output, preventing XSS when processing
372    /// untrusted content. Set to `true` only when the Markdown source
373    /// is fully trusted.
374    pub allow_unsafe_html: bool,
375
376    /// Sanitize raw HTML using ammonia instead of stripping it.
377    ///
378    /// When `true` and `allow_unsafe_html` is also `true`, the library
379    /// runs ammonia over the final output to strip dangerous elements
380    /// (`<script>`, `onclick`, etc.) while preserving safe tags like
381    /// `<div>`, `<span>`, and `<img>`. This provides a secure
382    /// middle-ground for user-authored HTML.
383    ///
384    /// Has no effect when `allow_unsafe_html` is `false` (HTML is
385    /// already stripped by the Markdown renderer).
386    pub sanitize_html: bool,
387
388    /// Wrap output in a full HTML5 document.
389    ///
390    /// When `true`, the pipeline wraps the generated body in:
391    /// ```html
392    /// <!DOCTYPE html>
393    /// <html lang="{language}">
394    /// <head><meta charset="utf-8"><title>…</title>{meta}{json-ld}</head>
395    /// <body>{content}</body>
396    /// </html>
397    /// ```
398    ///
399    /// SEO meta tags and JSON-LD are placed in `<head>`, and the
400    /// `language` field is injected as the `lang` attribute. When
401    /// `false` (the default), only an HTML fragment is returned.
402    pub generate_full_document: bool,
403
404    /// Maximum buffer size for file I/O operations (default: 16MB).
405    ///
406    /// Controls the upper bound on buffer allocation when reading
407    /// input files. Adjust this if you need to process unusually
408    /// large documents or want to constrain memory usage.
409    pub max_buffer_size: usize,
410
411    /// The encoding for file I/O (defaults to "utf-8").
412    ///
413    /// This field is used by [`markdown_file_to_html`] when reading
414    /// or writing files. In-memory functions ignore it.
415    pub encoding: String,
416
417    /// Render `$..$` and `$$..$$` LaTeX math spans to inline MathML.
418    ///
419    /// Pure server-side: no client-side JavaScript bundle required,
420    /// browsers render MathML natively. Powered by `pulldown-latex`
421    /// behind the `math` feature (on by default). When `false`, math
422    /// spans are passed through as-is.
423    pub enable_math: bool,
424
425    /// Rewrite `\u{60}\u{60}\u{60}mermaid` fenced code blocks for client-side
426    /// mermaid.js.
427    ///
428    /// The CommonMark engine emits these as
429    /// `<pre><code class="language-mermaid">…</code></pre>`. With this
430    /// flag on, the post-processing step rewrites them to
431    /// `<pre class="mermaid">…</pre>` so the standard mermaid.js
432    /// loader picks them up. The page must still include
433    /// `<script type="module">…mermaid.initialize…</script>` for the
434    /// diagrams to actually render.
435    pub enable_diagrams: bool,
436}
437
438impl Default for HtmlConfig {
439    fn default() -> Self {
440        Self {
441            enable_syntax_highlighting: true,
442            syntax_theme: Some(
443                constants::DEFAULT_SYNTAX_THEME.to_string(),
444            ),
445            minify_output: false,
446            add_aria_attributes: true,
447            generate_structured_data: false,
448            max_input_size: constants::DEFAULT_MAX_INPUT_SIZE,
449            language: String::from(constants::DEFAULT_LANGUAGE),
450            generate_toc: false,
451            allow_unsafe_html: false,
452            sanitize_html: false,
453            generate_full_document: false,
454            max_buffer_size: 16 * 1024 * 1024,
455            encoding: String::from("utf-8"),
456            enable_math: false,
457            enable_diagrams: false,
458        }
459    }
460}
461
462impl HtmlConfig {
463    /// Creates a new `HtmlConfig` using the builder pattern.
464    ///
465    /// # Examples
466    ///
467    /// ```rust
468    /// use html_generator::HtmlConfig;
469    ///
470    /// let config = HtmlConfig::builder()
471    ///     .with_syntax_highlighting(true, Some("monokai".to_string()))
472    ///     .with_language("en-GB")
473    ///     .build()
474    ///     .unwrap();
475    /// ```
476    pub fn builder() -> HtmlConfigBuilder {
477        HtmlConfigBuilder::default()
478    }
479
480    /// Validates the configuration settings.
481    ///
482    /// Checks that all configuration values are within acceptable ranges
483    /// and conform to required formats.
484    ///
485    /// # Returns
486    ///
487    /// Returns `Ok(())` if the configuration is valid, or an appropriate
488    /// error if validation fails.
489    ///
490    /// # Examples
491    ///
492    /// ```
493    /// use html_generator::HtmlConfig;
494    ///
495    /// let cfg = HtmlConfig::default();
496    /// cfg.validate().unwrap();
497    /// ```
498    ///
499    /// # Errors
500    ///
501    /// Returns [`crate::error::HtmlError::InvalidInput`] if `language`
502    /// is not a valid BCP 47 code or `max_input_size` is below
503    /// [`constants::MIN_INPUT_SIZE`].
504    pub fn validate(&self) -> Result<()> {
505        if self.max_input_size < constants::MIN_INPUT_SIZE {
506            return Err(HtmlError::InvalidInput(format!(
507                "Input size must be at least {} bytes",
508                constants::MIN_INPUT_SIZE
509            )));
510        }
511        if !validate_language_code(&self.language) {
512            return Err(HtmlError::InvalidInput(format!(
513                "Invalid language code: {}",
514                self.language
515            )));
516        }
517        Ok(())
518    }
519
520    /// Validates a file path before it is opened by
521    /// [`markdown_file_to_html`].
522    ///
523    /// Rejects paths that are empty, too long, contain a NUL byte, contain
524    /// any `..` component (directory traversal), or use an extension other
525    /// than `.md` or `.html`.
526    ///
527    /// This validator is defensive only: it does **not** decide whether a
528    /// caller is authorised to read the target file. Callers that expose
529    /// this API to untrusted input must enforce their own authorisation
530    /// (e.g. chroot, a sandbox root directory, or an allow-list) on top
531    /// of this check. Absolute paths are accepted deliberately so that
532    /// CLI tools can be invoked with fully qualified filenames.
533    pub(crate) fn validate_file_path(
534        path: impl AsRef<Path>,
535    ) -> Result<()> {
536        let path = path.as_ref();
537        let path_str = path.to_string_lossy();
538
539        if path_str.is_empty() {
540            return Err(HtmlError::InvalidInput(
541                "File path cannot be empty".to_string(),
542            ));
543        }
544
545        if path_str.len() > constants::MAX_PATH_LENGTH {
546            return Err(HtmlError::InvalidInput(format!(
547                "File path exceeds maximum length of {} characters",
548                constants::MAX_PATH_LENGTH
549            )));
550        }
551
552        // Reject NUL bytes: on Unix, C-string path handling silently
553        // truncates at the first NUL, which is a classic smuggling vector
554        // (e.g. "safe.md\0/etc/passwd").
555        if path_str.as_bytes().contains(&0) {
556            return Err(HtmlError::InvalidInput(
557                "File path must not contain NUL bytes".to_string(),
558            ));
559        }
560
561        if path.components().any(|c| matches!(c, Component::ParentDir))
562        {
563            return Err(HtmlError::InvalidInput(
564                "Directory traversal is not allowed in file paths"
565                    .to_string(),
566            ));
567        }
568
569        if let Some(ext) = path.extension() {
570            if !matches!(ext.to_string_lossy().as_ref(), "md" | "html")
571            {
572                return Err(HtmlError::InvalidInput(
573                    "Invalid file extension: only .md and .html files are allowed".to_string(),
574                ));
575            }
576        }
577
578        Ok(())
579    }
580}
581
582/// Builder for constructing `HtmlConfig` instances.
583///
584/// Provides a fluent interface for creating and customizing HTML
585/// configuration options.
586///
587/// # Examples
588///
589/// ```
590/// use html_generator::HtmlConfigBuilder;
591///
592/// let cfg = HtmlConfigBuilder::new()
593///     .with_language("en-GB")
594///     .with_full_document(true)
595///     .build()
596///     .unwrap();
597/// assert!(cfg.generate_full_document);
598/// ```
599#[derive(Debug, Default)]
600pub struct HtmlConfigBuilder {
601    config: HtmlConfig,
602}
603
604impl HtmlConfigBuilder {
605    /// Creates a new `HtmlConfigBuilder` with default options.
606    ///
607    /// # Examples
608    ///
609    /// ```
610    /// use html_generator::HtmlConfigBuilder;
611    ///
612    /// let _ = HtmlConfigBuilder::new();
613    /// ```
614    pub fn new() -> Self {
615        Self::default()
616    }
617
618    /// Enables or disables syntax highlighting for code blocks.
619    ///
620    /// # Arguments
621    ///
622    /// * `enable` - Whether to enable syntax highlighting
623    /// * `theme` - Optional theme name for syntax highlighting
624    ///
625    /// # Examples
626    ///
627    /// ```
628    /// use html_generator::HtmlConfigBuilder;
629    ///
630    /// let cfg = HtmlConfigBuilder::new()
631    ///     .with_syntax_highlighting(true, Some("monokai".into()))
632    ///     .build()
633    ///     .unwrap();
634    /// assert_eq!(cfg.syntax_theme.as_deref(), Some("monokai"));
635    /// ```
636    #[must_use]
637    pub fn with_syntax_highlighting(
638        mut self,
639        enable: bool,
640        theme: Option<String>,
641    ) -> Self {
642        self.config.enable_syntax_highlighting = enable;
643        self.config.syntax_theme = if enable {
644            theme.or_else(|| {
645                Some(constants::DEFAULT_SYNTAX_THEME.to_string())
646            })
647        } else {
648            None
649        };
650        self
651    }
652
653    /// Sets the language for generated content.
654    ///
655    /// # Examples
656    ///
657    /// ```
658    /// use html_generator::HtmlConfigBuilder;
659    ///
660    /// let cfg = HtmlConfigBuilder::new()
661    ///     .with_language("fr-FR")
662    ///     .build()
663    ///     .unwrap();
664    /// assert_eq!(cfg.language, "fr-FR");
665    /// ```
666    #[must_use]
667    pub fn with_language(
668        mut self,
669        language: impl Into<String>,
670    ) -> Self {
671        self.config.language = language.into();
672        self
673    }
674
675    /// Enables or disables HTML sanitization via ammonia.
676    ///
677    /// When enabled alongside `allow_unsafe_html`, dangerous elements
678    /// are stripped while safe tags are preserved.
679    ///
680    /// # Examples
681    ///
682    /// ```
683    /// use html_generator::HtmlConfigBuilder;
684    ///
685    /// let cfg = HtmlConfigBuilder::new()
686    ///     .with_sanitization(true)
687    ///     .build()
688    ///     .unwrap();
689    /// assert!(cfg.sanitize_html);
690    /// ```
691    #[must_use]
692    pub fn with_sanitization(mut self, enable: bool) -> Self {
693        self.config.sanitize_html = enable;
694        self
695    }
696
697    /// Enables or disables full HTML5 document wrapping.
698    ///
699    /// When enabled, the output is wrapped in `<!DOCTYPE html>` with
700    /// `<head>` (containing meta/JSON-LD) and `<body>`.
701    ///
702    /// # Examples
703    ///
704    /// ```
705    /// use html_generator::HtmlConfigBuilder;
706    ///
707    /// let cfg = HtmlConfigBuilder::new()
708    ///     .with_full_document(true)
709    ///     .build()
710    ///     .unwrap();
711    /// assert!(cfg.generate_full_document);
712    /// ```
713    #[must_use]
714    pub fn with_full_document(mut self, enable: bool) -> Self {
715        self.config.generate_full_document = enable;
716        self
717    }
718
719    /// Sets the maximum buffer size for file I/O operations.
720    ///
721    /// # Examples
722    ///
723    /// ```
724    /// use html_generator::HtmlConfigBuilder;
725    ///
726    /// let cfg = HtmlConfigBuilder::new()
727    ///     .with_max_buffer_size(8 * 1024 * 1024)
728    ///     .build()
729    ///     .unwrap();
730    /// assert_eq!(cfg.max_buffer_size, 8 * 1024 * 1024);
731    /// ```
732    #[must_use]
733    pub fn with_max_buffer_size(mut self, size: usize) -> Self {
734        self.config.max_buffer_size = size;
735        self
736    }
737
738    /// Enables or disables server-side LaTeX → MathML rendering.
739    ///
740    /// When enabled, `$..$` and `$$..$$` spans in the rendered HTML
741    /// are replaced with `<math>…</math>` elements. Browsers render
742    /// MathML natively, so no client-side JS is needed. Requires
743    /// the `math` feature (on by default).
744    ///
745    /// # Examples
746    ///
747    /// ```
748    /// use html_generator::HtmlConfigBuilder;
749    ///
750    /// let cfg = HtmlConfigBuilder::new()
751    ///     .with_math(true)
752    ///     .build()
753    ///     .unwrap();
754    /// assert!(cfg.enable_math);
755    /// ```
756    #[must_use]
757    pub fn with_math(mut self, enable: bool) -> Self {
758        self.config.enable_math = enable;
759        self
760    }
761
762    /// Enables or disables Mermaid diagram passthrough.
763    ///
764    /// When enabled, `\u{60}\u{60}\u{60}mermaid` fenced code blocks are rewritten
765    /// from `<pre><code class="language-mermaid">` to
766    /// `<pre class="mermaid">` so client-side mermaid.js renders
767    /// them.
768    ///
769    /// # Examples
770    ///
771    /// ```
772    /// use html_generator::HtmlConfigBuilder;
773    ///
774    /// let cfg = HtmlConfigBuilder::new()
775    ///     .with_diagrams(true)
776    ///     .build()
777    ///     .unwrap();
778    /// assert!(cfg.enable_diagrams);
779    /// ```
780    #[must_use]
781    pub fn with_diagrams(mut self, enable: bool) -> Self {
782        self.config.enable_diagrams = enable;
783        self
784    }
785
786    /// Builds the configuration, validating all settings.
787    ///
788    /// # Examples
789    ///
790    /// ```
791    /// use html_generator::HtmlConfigBuilder;
792    ///
793    /// let cfg = HtmlConfigBuilder::new()
794    ///     .with_language("en-GB")
795    ///     .build()
796    ///     .unwrap();
797    /// assert_eq!(cfg.language, "en-GB");
798    /// ```
799    ///
800    /// # Errors
801    ///
802    /// Returns the first [`crate::error::HtmlError::InvalidInput`]
803    /// produced by [`HtmlConfig::validate`] (e.g. an unknown language
804    /// code or a `max_input_size` below the minimum).
805    pub fn build(self) -> Result<HtmlConfig> {
806        self.config.validate()?;
807        Ok(self.config)
808    }
809}
810
811/// Converts Markdown content to HTML.
812///
813/// This function processes Unicode Markdown content and returns HTML output.
814/// The input must be valid Unicode - if your input is encoded (e.g., UTF-8),
815/// you must decode it before passing it to this function.
816///
817/// # Arguments
818///
819/// * `content` - The Markdown content as a Unicode string
820/// * `config` - Optional configuration for the conversion
821///
822/// # Returns
823///
824/// Returns the generated HTML as a Unicode string wrapped in a `Result`
825///
826/// # Errors
827///
828/// Returns an error if:
829/// * The input content is invalid Unicode
830/// * HTML generation fails
831/// * Input size exceeds configured maximum
832///
833/// # Examples
834///
835/// ```rust
836/// use html_generator::{markdown_to_html, MarkdownConfig};
837///
838/// let markdown = "# Hello\n\nWorld";
839/// let html = markdown_to_html(markdown, None)?;
840/// assert!(html.contains("<h1>Hello</h1>"));
841/// # Ok::<(), html_generator::error::HtmlError>(())
842/// ```
843#[allow(deprecated)]
844pub fn markdown_to_html(
845    content: &str,
846    config: Option<MarkdownConfig>,
847) -> Result<String> {
848    let html_config: HtmlConfig =
849        config.map_or_else(HtmlConfig::default, HtmlConfig::from);
850
851    if content.is_empty() {
852        return Err(HtmlError::InvalidInput(
853            "Input content is empty".to_string(),
854        ));
855    }
856
857    if content.len() > html_config.max_input_size {
858        return Err(HtmlError::InputTooLarge(content.len()));
859    }
860
861    generate_html(content, &html_config)
862}
863
864/// Converts a Markdown file to HTML.
865///
866/// This function reads from a file or stdin and writes the generated HTML to
867/// a specified destination. It handles encoding/decoding of content.
868///
869/// # Arguments
870///
871/// * `input` - The input source (file path or None for stdin)
872/// * `output` - The output destination (defaults to stdout)
873/// * `config` - Optional configuration including encoding settings
874///
875/// # Returns
876///
877/// Returns `Result<()>` indicating success or failure of the operation.
878///
879/// # Errors
880///
881/// Returns an error if:
882/// * Input file is not found or cannot be read
883/// * Output file cannot be written
884/// * Configuration is invalid
885/// * Input size exceeds configured maximum
886///
887/// # Examples
888///
889/// ```no_run
890/// use html_generator::{markdown_file_to_html, OutputDestination, MarkdownConfig};
891/// use std::path::{Path, PathBuf};
892///
893/// // Convert file to HTML and write to stdout
894/// markdown_file_to_html(
895///     Some(PathBuf::from("input.md")),
896///     None,
897///     None,
898/// )?;
899///
900/// // Convert stdin to HTML file
901/// markdown_file_to_html(
902///     None::<PathBuf>,  // Explicit type annotation
903///     Some(OutputDestination::File("output.html".into())),
904///     Some(MarkdownConfig::default()),
905/// )?;
906/// # Ok::<(), html_generator::error::HtmlError>(())
907/// ```
908#[inline]
909#[allow(deprecated)]
910pub fn markdown_file_to_html(
911    input: Option<impl AsRef<Path>>,
912    output: Option<OutputDestination>,
913    config: Option<MarkdownConfig>,
914) -> Result<()> {
915    let config = config.unwrap_or_default();
916    let output = output.unwrap_or_default();
917
918    // Validate paths first
919    validate_paths(&input, &output)?;
920
921    // Read and process input
922    let content = read_input(input)?;
923
924    // Generate HTML
925    let html = markdown_to_html(&content, Some(config))?;
926
927    // Write output
928    write_output(output, html.as_bytes())
929}
930
931/// Validates input and output paths
932fn validate_paths(
933    input: &Option<impl AsRef<Path>>,
934    output: &OutputDestination,
935) -> Result<()> {
936    if let Some(path) = input.as_ref() {
937        HtmlConfig::validate_file_path(path)?;
938    }
939    if let OutputDestination::File(ref path) = output {
940        HtmlConfig::validate_file_path(path)?;
941    }
942    Ok(())
943}
944
945/// Reads the full contents of `reader` into a UTF-8 string, wrapping
946/// any I/O error as `HtmlError::Io` with the given label for context
947/// (e.g. `"input"` or `"stdin"`).
948///
949/// Extracted so the stdin path of [`read_input`] is testable against
950/// an in-memory reader without needing a child process.
951fn read_all_from_reader<R: Read>(
952    mut reader: R,
953    label: &str,
954) -> Result<String> {
955    let mut content = String::with_capacity(MAX_BUFFER_SIZE);
956    // read_to_string returns the byte count; we only need the String.
957    let _ = reader.read_to_string(&mut content).map_err(|e| {
958        HtmlError::Io(io::Error::new(
959            e.kind(),
960            format!("Failed to read from {label}: {e}"),
961        ))
962    })?;
963    Ok(content)
964}
965
966/// Reads content from the input source (a file path, or stdin when
967/// `None`).
968fn read_input(input: Option<impl AsRef<Path>>) -> Result<String> {
969    match input {
970        Some(path) => {
971            let file = File::open(path).map_err(HtmlError::Io)?;
972            let reader =
973                BufReader::with_capacity(MAX_BUFFER_SIZE, file);
974            read_all_from_reader(reader, "input")
975        }
976        None => {
977            let stdin = io::stdin();
978            let reader =
979                BufReader::with_capacity(MAX_BUFFER_SIZE, stdin.lock());
980            read_all_from_reader(reader, "stdin")
981        }
982    }
983}
984
985/// Writes `content` to `writer`, wrapping any I/O error as
986/// `HtmlError::Io` with a label like `"file '…'"` or `"stdout"`.
987///
988/// Extracted so every destination in [`write_output`] shares one
989/// tested implementation, and so the error paths can be exercised by
990/// a failing in-memory writer.
991fn write_all_to_writer<W: Write>(
992    mut writer: W,
993    content: &[u8],
994    label: &str,
995) -> Result<()> {
996    writer.write_all(content).map_err(|e| {
997        HtmlError::Io(io::Error::new(
998            e.kind(),
999            format!("Failed to write to {label}: {e}"),
1000        ))
1001    })?;
1002    writer.flush().map_err(|e| {
1003        HtmlError::Io(io::Error::new(
1004            e.kind(),
1005            format!("Failed to flush {label}: {e}"),
1006        ))
1007    })?;
1008    Ok(())
1009}
1010
1011/// Writes content to the output destination.
1012fn write_output(
1013    output: OutputDestination,
1014    content: &[u8],
1015) -> Result<()> {
1016    match output {
1017        OutputDestination::File(path) => {
1018            let file = File::create(&path).map_err(|e| {
1019                HtmlError::Io(io::Error::new(
1020                    e.kind(),
1021                    format!("Failed to create file '{}': {}", path, e),
1022                ))
1023            })?;
1024            write_all_to_writer(
1025                BufWriter::new(file),
1026                content,
1027                &format!("file '{path}'"),
1028            )
1029        }
1030        OutputDestination::Writer(mut writer) => write_all_to_writer(
1031            BufWriter::new(&mut writer),
1032            content,
1033            "output",
1034        ),
1035        OutputDestination::Stdout => {
1036            let stdout = io::stdout();
1037            write_all_to_writer(
1038                BufWriter::new(stdout.lock()),
1039                content,
1040                "stdout",
1041            )
1042        }
1043    }
1044}
1045
1046/// Validates that a language code matches the BCP 47 format (e.g., "en-GB").
1047///
1048/// This function checks if a given language code follows the BCP 47 format,
1049/// which requires both language and region codes.
1050///
1051/// # Arguments
1052///
1053/// * `lang` - The language code to validate
1054///
1055/// # Returns
1056///
1057/// Returns true if the language code is valid (e.g., "en-GB"), false otherwise.
1058///
1059/// # Examples
1060///
1061/// ```
1062/// use html_generator::validate_language_code;
1063///
1064/// assert!(validate_language_code("en-GB"));  // Valid
1065/// assert!(!validate_language_code("en"));    // Invalid - missing region
1066/// assert!(!validate_language_code("123"));   // Invalid - not a language code
1067/// assert!(!validate_language_code("en_GB")); // Invalid - wrong separator
1068/// ```
1069pub fn validate_language_code(lang: &str) -> bool {
1070    use once_cell::sync::Lazy;
1071    use regex::Regex;
1072
1073    static LANG_REGEX: Lazy<Regex> = Lazy::new(|| {
1074        Regex::new(constants::LANGUAGE_CODE_PATTERN)
1075            .expect("static LANG_REGEX must compile")
1076    });
1077
1078    LANG_REGEX.is_match(lang)
1079}
1080
1081#[cfg(test)]
1082#[allow(deprecated)]
1083mod tests {
1084    use super::*;
1085    use regex::Regex;
1086    use std::io::Cursor;
1087    use tempfile::{tempdir, TempDir};
1088
1089    /// A reader whose `read` call always fails — used to cover the
1090    /// stdin failure branch of [`read_all_from_reader`].
1091    struct FailingReader;
1092
1093    impl Read for FailingReader {
1094        fn read(&mut self, _: &mut [u8]) -> io::Result<usize> {
1095            Err(io::Error::other("synthetic read failure"))
1096        }
1097    }
1098
1099    /// A writer whose `write` + `flush` both fail — used to cover the
1100    /// write/flush error branches of [`write_all_to_writer`].
1101    struct FailingWriter {
1102        /// If `true`, fail on `flush` only (writes succeed), otherwise
1103        /// fail immediately on `write`.
1104        flush_only: bool,
1105    }
1106
1107    impl Write for FailingWriter {
1108        fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
1109            if self.flush_only {
1110                Ok(buf.len())
1111            } else {
1112                Err(io::Error::other("synthetic write failure"))
1113            }
1114        }
1115        fn flush(&mut self) -> io::Result<()> {
1116            Err(io::Error::other("synthetic flush failure"))
1117        }
1118    }
1119
1120    #[test]
1121    fn test_read_all_from_reader_success() {
1122        let input = Cursor::new(b"hello world".to_vec());
1123        let s = read_all_from_reader(input, "memory").unwrap();
1124        assert_eq!(s, "hello world");
1125    }
1126
1127    #[test]
1128    fn test_read_all_from_reader_surfaces_io_error() {
1129        let err =
1130            read_all_from_reader(FailingReader, "stdin").unwrap_err();
1131        match err {
1132            HtmlError::Io(e) => {
1133                let msg = e.to_string();
1134                assert!(
1135                    msg.contains("Failed to read from stdin"),
1136                    "unexpected error: {msg}"
1137                );
1138            }
1139            other => panic!("expected Io, got {other:?}"),
1140        }
1141    }
1142
1143    #[test]
1144    fn test_write_all_to_writer_success_covers_stdout_path() {
1145        let mut buf: Vec<u8> = Vec::new();
1146        write_all_to_writer(&mut buf, b"hi", "memory").unwrap();
1147        assert_eq!(buf, b"hi");
1148    }
1149
1150    #[test]
1151    fn test_write_all_to_writer_surfaces_write_error() {
1152        let err = write_all_to_writer(
1153            FailingWriter { flush_only: false },
1154            b"x",
1155            "output",
1156        )
1157        .unwrap_err();
1158        assert!(
1159            matches!(err, HtmlError::Io(ref e) if e.to_string().contains("Failed to write to output"))
1160        );
1161    }
1162
1163    #[test]
1164    fn test_write_all_to_writer_surfaces_flush_error() {
1165        let err = write_all_to_writer(
1166            FailingWriter { flush_only: true },
1167            b"x",
1168            "output",
1169        )
1170        .unwrap_err();
1171        assert!(
1172            matches!(err, HtmlError::Io(ref e) if e.to_string().contains("Failed to flush output"))
1173        );
1174    }
1175
1176    /// Creates a temporary test directory for file operations.
1177    ///
1178    /// The directory and its contents are automatically cleaned up when
1179    /// the returned TempDir is dropped.
1180    fn setup_test_dir() -> TempDir {
1181        tempdir().expect("Failed to create temporary directory")
1182    }
1183
1184    /// Creates a test file with the given content.
1185    ///
1186    /// # Arguments
1187    ///
1188    /// * `dir` - The temporary directory to create the file in
1189    /// * `content` - The content to write to the file
1190    ///
1191    /// # Returns
1192    ///
1193    /// Returns the path to the created file.
1194    fn create_test_file(
1195        dir: &TempDir,
1196        content: &str,
1197    ) -> std::path::PathBuf {
1198        let path = dir.path().join("test.md");
1199        std::fs::write(&path, content)
1200            .expect("Failed to write test file");
1201        path
1202    }
1203
1204    mod config_tests {
1205        use super::*;
1206
1207        #[test]
1208        fn test_config_validation() {
1209            // Test invalid input size
1210            let config = HtmlConfig {
1211                max_input_size: 100, // Too small
1212                ..Default::default()
1213            };
1214            assert!(config.validate().is_err());
1215
1216            // Test invalid language code
1217            let config = HtmlConfig {
1218                language: "invalid".to_string(),
1219                ..Default::default()
1220            };
1221            assert!(config.validate().is_err());
1222
1223            // Test valid default configuration
1224            let config = HtmlConfig::default();
1225            assert!(config.validate().is_ok());
1226        }
1227
1228        #[test]
1229        fn test_config_builder() {
1230            let result = HtmlConfigBuilder::new()
1231                .with_syntax_highlighting(
1232                    true,
1233                    Some("monokai".to_string()),
1234                )
1235                .with_language("en-GB")
1236                .build();
1237
1238            assert!(result.is_ok());
1239            let config = result.unwrap();
1240            assert!(config.enable_syntax_highlighting);
1241            assert_eq!(
1242                config.syntax_theme,
1243                Some("monokai".to_string())
1244            );
1245            assert_eq!(config.language, "en-GB");
1246        }
1247
1248        #[test]
1249        fn test_config_builder_invalid() {
1250            let result = HtmlConfigBuilder::new()
1251                .with_language("invalid")
1252                .build();
1253
1254            assert!(matches!(
1255                result,
1256                Err(HtmlError::InvalidInput(msg)) if msg.contains("Invalid language code")
1257            ));
1258        }
1259
1260        #[test]
1261        fn test_html_config_with_no_syntax_theme() {
1262            let config = HtmlConfig {
1263                enable_syntax_highlighting: true,
1264                syntax_theme: None,
1265                ..Default::default()
1266            };
1267
1268            assert!(config.validate().is_ok());
1269        }
1270
1271        #[test]
1272        fn test_file_conversion_with_large_output() -> Result<()> {
1273            let temp_dir = setup_test_dir();
1274            let input_path = create_test_file(
1275                &temp_dir,
1276                "# Large\n\nContent".repeat(10_000).as_str(),
1277            );
1278            let output_path = temp_dir.path().join("large_output.html");
1279
1280            let result = markdown_file_to_html(
1281                Some(&input_path),
1282                Some(OutputDestination::File(
1283                    output_path.to_string_lossy().into(),
1284                )),
1285                None,
1286            );
1287
1288            assert!(result.is_ok());
1289            let content = std::fs::read_to_string(output_path)?;
1290            assert!(content.contains("<h1>Large</h1>"));
1291
1292            Ok(())
1293        }
1294
1295        #[test]
1296        fn test_markdown_with_broken_syntax() {
1297            let markdown = "# Unmatched Header\n**Bold start";
1298            let result = markdown_to_html(markdown, None);
1299            assert!(result.is_ok());
1300            let html = result.unwrap();
1301            assert!(html.contains("<h1>Unmatched Header</h1>"));
1302            assert!(html.contains("**Bold start</p>")); // Ensure content is preserved
1303        }
1304
1305        #[test]
1306        fn test_language_code_with_custom_regex() {
1307            let custom_lang_regex =
1308                Regex::new(r"^[a-z]{2}-[A-Z]{2}$").unwrap();
1309            assert!(custom_lang_regex.is_match("en-GB"));
1310            assert!(!custom_lang_regex.is_match("EN-gb")); // Case-sensitive check
1311        }
1312
1313        #[test]
1314        fn test_markdown_to_html_error_handling() {
1315            let result = markdown_to_html("", None);
1316            assert!(matches!(result, Err(HtmlError::InvalidInput(_))));
1317
1318            let oversized_input =
1319                "a".repeat(constants::DEFAULT_MAX_INPUT_SIZE + 1);
1320            let result = markdown_to_html(&oversized_input, None);
1321            assert!(matches!(result, Err(HtmlError::InputTooLarge(_))));
1322        }
1323
1324        #[test]
1325        fn test_performance_with_nested_lists() {
1326            let nested_list = "- Item\n".repeat(1000);
1327            let result = markdown_to_html(&nested_list, None);
1328            assert!(result.is_ok());
1329            let html = result.unwrap();
1330            assert!(html.matches("<li>").count() == 1000);
1331        }
1332    }
1333
1334    mod file_validation_tests {
1335        use super::*;
1336        use std::path::PathBuf;
1337
1338        #[test]
1339        fn test_valid_paths() {
1340            let valid_paths = [
1341                PathBuf::from("test.md"),
1342                PathBuf::from("test.html"),
1343                PathBuf::from("subfolder/test.md"),
1344            ];
1345
1346            for path in valid_paths {
1347                assert!(
1348                    HtmlConfig::validate_file_path(&path).is_ok(),
1349                    "Path should be valid: {:?}",
1350                    path
1351                );
1352            }
1353        }
1354
1355        #[test]
1356        fn test_invalid_paths() {
1357            let invalid_paths = [
1358                PathBuf::from(""),           // Empty path
1359                PathBuf::from("../test.md"), // Directory traversal
1360                PathBuf::from("test.exe"),   // Invalid extension
1361                PathBuf::from(
1362                    "a".repeat(constants::MAX_PATH_LENGTH + 1),
1363                ), // Too long
1364            ];
1365
1366            for path in invalid_paths {
1367                assert!(
1368                    HtmlConfig::validate_file_path(&path).is_err(),
1369                    "Path should be invalid: {:?}",
1370                    path
1371                );
1372            }
1373        }
1374    }
1375
1376    mod markdown_conversion_tests {
1377        use super::*;
1378
1379        #[test]
1380        fn test_basic_conversion() {
1381            let markdown = "# Test\n\nHello world";
1382            let result = markdown_to_html(markdown, None);
1383            assert!(result.is_ok());
1384
1385            let html = result.unwrap();
1386            assert!(html.contains("<h1>Test</h1>"));
1387            assert!(html.contains("<p>Hello world</p>"));
1388        }
1389
1390        #[test]
1391        fn test_conversion_with_config() {
1392            let markdown = "# Test\n```rust\nfn main() {}\n```";
1393            let config = MarkdownConfig {
1394                html_config: HtmlConfig {
1395                    enable_syntax_highlighting: true,
1396                    ..Default::default()
1397                },
1398                ..Default::default()
1399            };
1400
1401            let result = markdown_to_html(markdown, Some(config));
1402            assert!(result.is_ok());
1403            assert!(result.unwrap().contains("language-rust"));
1404        }
1405
1406        #[test]
1407        fn test_empty_content() {
1408            assert!(matches!(
1409                markdown_to_html("", None),
1410                Err(HtmlError::InvalidInput(_))
1411            ));
1412        }
1413
1414        #[test]
1415        fn test_content_too_large() {
1416            let large_content =
1417                "a".repeat(constants::DEFAULT_MAX_INPUT_SIZE + 1);
1418            assert!(matches!(
1419                markdown_to_html(&large_content, None),
1420                Err(HtmlError::InputTooLarge(_))
1421            ));
1422        }
1423    }
1424
1425    mod file_operation_tests {
1426        use super::*;
1427
1428        #[test]
1429        fn test_file_conversion() -> Result<()> {
1430            let temp_dir = setup_test_dir();
1431            let input_path =
1432                create_test_file(&temp_dir, "# Test\n\nHello world");
1433            let output_path = temp_dir.path().join("test.html");
1434
1435            markdown_file_to_html(
1436                Some(&input_path),
1437                Some(OutputDestination::File(
1438                    output_path.to_string_lossy().into(),
1439                )),
1440                None::<MarkdownConfig>,
1441            )?;
1442
1443            let content = std::fs::read_to_string(output_path)?;
1444            assert!(content.contains("<h1>Test</h1>"));
1445
1446            Ok(())
1447        }
1448
1449        #[test]
1450        fn test_writer_output() {
1451            let temp_dir = setup_test_dir();
1452            let input_path =
1453                create_test_file(&temp_dir, "# Test\nHello");
1454            let buffer = Box::new(Cursor::new(Vec::new()));
1455
1456            let result = markdown_file_to_html(
1457                Some(&input_path),
1458                Some(OutputDestination::Writer(buffer)),
1459                None,
1460            );
1461
1462            assert!(result.is_ok());
1463        }
1464
1465        #[test]
1466        fn test_writer_output_no_input() {
1467            let buffer = Box::new(Cursor::new(Vec::new()));
1468
1469            let result = markdown_file_to_html(
1470                Some(Path::new("nonexistent.md")),
1471                Some(OutputDestination::Writer(buffer)),
1472                None,
1473            );
1474
1475            assert!(result.is_err());
1476        }
1477    }
1478
1479    mod language_validation_tests {
1480        use super::*;
1481
1482        #[test]
1483        fn test_valid_language_codes() {
1484            let valid_codes =
1485                ["en-GB", "fr-FR", "de-DE", "es-ES", "zh-CN"];
1486
1487            for code in valid_codes {
1488                assert!(
1489                    validate_language_code(code),
1490                    "Language code '{}' should be valid",
1491                    code
1492                );
1493            }
1494        }
1495
1496        #[test]
1497        fn test_invalid_language_codes() {
1498            let invalid_codes = [
1499                "",        // Empty
1500                "en",      // Missing region
1501                "eng-GBR", // Wrong format
1502                "en_GB",   // Wrong separator
1503                "123-45",  // Invalid characters
1504                "GB-en",   // Wrong order
1505                "en-gb",   // Wrong case
1506            ];
1507
1508            for code in invalid_codes {
1509                assert!(
1510                    !validate_language_code(code),
1511                    "Language code '{}' should be invalid",
1512                    code
1513                );
1514            }
1515        }
1516    }
1517
1518    mod integration_tests {
1519        use super::*;
1520
1521        #[test]
1522        fn test_end_to_end_conversion() -> Result<()> {
1523            let temp_dir = setup_test_dir();
1524            let content = r#"---
1525title: Test Document
1526---
1527
1528# Hello World
1529
1530This is a test document with:
1531- A list
1532- And some **bold** text
1533"#;
1534            let input_path = create_test_file(&temp_dir, content);
1535            let output_path = temp_dir.path().join("test.html");
1536
1537            let config = MarkdownConfig {
1538                html_config: HtmlConfig {
1539                    enable_syntax_highlighting: true,
1540                    generate_toc: true,
1541                    ..Default::default()
1542                },
1543                ..Default::default()
1544            };
1545
1546            markdown_file_to_html(
1547                Some(&input_path),
1548                Some(OutputDestination::File(
1549                    output_path.to_string_lossy().into(),
1550                )),
1551                Some(config),
1552            )?;
1553
1554            let html = std::fs::read_to_string(&output_path)?;
1555            assert!(html.contains("<h1>Hello World</h1>"));
1556            assert!(html.contains("<strong>bold</strong>"));
1557            assert!(html.contains("<ul>"));
1558
1559            Ok(())
1560        }
1561
1562        #[test]
1563        fn test_output_destination_debug() {
1564            assert_eq!(
1565                format!(
1566                    "{:?}",
1567                    OutputDestination::File("test.html".to_string())
1568                ),
1569                r#"File("test.html")"#
1570            );
1571            assert_eq!(
1572                format!("{:?}", OutputDestination::Stdout),
1573                "Stdout"
1574            );
1575
1576            let writer = Box::new(Cursor::new(Vec::new()));
1577            assert_eq!(
1578                format!("{:?}", OutputDestination::Writer(writer)),
1579                "Writer(<dyn Write>)"
1580            );
1581        }
1582    }
1583
1584    mod markdown_config_tests {
1585        use super::*;
1586
1587        #[test]
1588        fn test_markdown_config_custom_encoding() {
1589            let config = MarkdownConfig {
1590                encoding: "latin1".to_string(),
1591                html_config: HtmlConfig::default(),
1592            };
1593            assert_eq!(config.encoding, "latin1");
1594        }
1595
1596        #[test]
1597        fn test_markdown_config_default() {
1598            let config = MarkdownConfig::default();
1599            assert_eq!(config.encoding, "utf-8");
1600            assert_eq!(config.html_config, HtmlConfig::default());
1601        }
1602
1603        #[test]
1604        fn test_markdown_config_clone() {
1605            let config = MarkdownConfig::default();
1606            let cloned = config.clone();
1607            assert_eq!(config, cloned);
1608        }
1609    }
1610
1611    mod config_error_tests {
1612        use super::*;
1613
1614        #[test]
1615        fn test_config_error_display() {
1616            let error = ConfigError::InvalidInputSize(100, 1024);
1617            assert!(error.to_string().contains("Invalid input size"));
1618
1619            let error =
1620                ConfigError::InvalidLanguageCode("xx".to_string());
1621            assert!(error
1622                .to_string()
1623                .contains("Invalid language code"));
1624
1625            let error =
1626                ConfigError::InvalidFilePath("../bad/path".to_string());
1627            assert!(error.to_string().contains("Invalid file path"));
1628        }
1629    }
1630
1631    mod output_destination_tests {
1632        use super::*;
1633
1634        #[test]
1635        fn test_output_destination_default() {
1636            assert!(matches!(
1637                OutputDestination::default(),
1638                OutputDestination::Stdout
1639            ));
1640        }
1641
1642        #[test]
1643        fn test_output_destination_file() {
1644            let dest = OutputDestination::File("test.html".to_string());
1645            assert!(matches!(dest, OutputDestination::File(_)));
1646        }
1647
1648        #[test]
1649        fn test_output_destination_writer() {
1650            let writer = Box::new(Cursor::new(Vec::new()));
1651            let dest = OutputDestination::Writer(writer);
1652            assert!(matches!(dest, OutputDestination::Writer(_)));
1653        }
1654    }
1655
1656    mod html_config_tests {
1657        use super::*;
1658
1659        #[test]
1660        fn test_html_config_builder_all_options() {
1661            let config = HtmlConfig::builder()
1662                .with_syntax_highlighting(
1663                    true,
1664                    Some("dracula".to_string()),
1665                )
1666                .with_language("en-US")
1667                .build()
1668                .unwrap();
1669
1670            assert!(config.enable_syntax_highlighting);
1671            assert_eq!(
1672                config.syntax_theme,
1673                Some("dracula".to_string())
1674            );
1675            assert_eq!(config.language, "en-US");
1676        }
1677
1678        #[test]
1679        fn test_html_config_validation_edge_cases() {
1680            let config = HtmlConfig {
1681                max_input_size: constants::MIN_INPUT_SIZE,
1682                ..Default::default()
1683            };
1684            assert!(config.validate().is_ok());
1685
1686            let config = HtmlConfig {
1687                max_input_size: constants::MIN_INPUT_SIZE - 1,
1688                ..Default::default()
1689            };
1690            assert!(config.validate().is_err());
1691        }
1692    }
1693
1694    mod markdown_processing_tests {
1695        use super::*;
1696
1697        #[test]
1698        fn test_markdown_to_html_with_front_matter() -> Result<()> {
1699            let markdown = r#"---
1700title: Test
1701author: Test Author
1702---
1703# Heading
1704Content"#;
1705            let html = markdown_to_html(markdown, None)?;
1706            assert!(html.contains("<h1>Heading</h1>"));
1707            assert!(html.contains("<p>Content</p>"));
1708            Ok(())
1709        }
1710
1711        #[test]
1712        fn test_markdown_to_html_with_code_blocks() -> Result<()> {
1713            let markdown = r#"```rust
1714fn main() {
1715    println!("Hello");
1716}
1717```"#;
1718            let config = MarkdownConfig {
1719                html_config: HtmlConfig {
1720                    enable_syntax_highlighting: true,
1721                    ..Default::default()
1722                },
1723                ..Default::default()
1724            };
1725            let html = markdown_to_html(markdown, Some(config))?;
1726            assert!(html.contains("language-rust"));
1727            Ok(())
1728        }
1729
1730        #[test]
1731        fn test_markdown_to_html_with_tables() -> Result<()> {
1732            let markdown = r#"
1733| Header 1 | Header 2 |
1734|----------|----------|
1735| Cell 1   | Cell 2   |
1736"#;
1737            let html = markdown_to_html(markdown, None)?;
1738            // First verify the HTML output to see what we're getting
1739            println!("Generated HTML for table: {}", html);
1740            // Check for common table elements - div wrapper is often used for table responsiveness
1741            assert!(html.contains("Header 1"));
1742            assert!(html.contains("Cell 1"));
1743            assert!(html.contains("Cell 2"));
1744            Ok(())
1745        }
1746
1747        #[test]
1748        fn test_invalid_encoding_handling() {
1749            let config = MarkdownConfig {
1750                encoding: "unsupported-encoding".to_string(),
1751                html_config: HtmlConfig::default(),
1752            };
1753            // Simulate usage where encoding matters
1754            let result = markdown_to_html("# Test", Some(config));
1755            assert!(result.is_ok()); // Assuming encoding isn't directly validated during processing
1756        }
1757
1758        #[test]
1759        fn test_config_error_types() {
1760            let error = ConfigError::InvalidInputSize(512, 1024);
1761            assert_eq!(format!("{}", error), "Invalid input size: 512 bytes is below minimum of 1024 bytes");
1762        }
1763    }
1764
1765    mod file_processing_tests {
1766        use crate::constants;
1767        use crate::HtmlConfig;
1768        use crate::{
1769            markdown_file_to_html, HtmlError, OutputDestination,
1770        };
1771        use std::io::Cursor;
1772        use std::path::Path;
1773        use tempfile::NamedTempFile;
1774
1775        #[test]
1776        fn test_display_file() {
1777            let output =
1778                OutputDestination::File("output.html".to_string());
1779            let display = format!("{}", output);
1780            assert_eq!(display, "File(output.html)");
1781        }
1782
1783        #[test]
1784        fn test_display_stdout() {
1785            let output = OutputDestination::Stdout;
1786            let display = format!("{}", output);
1787            assert_eq!(display, "Stdout");
1788        }
1789
1790        #[test]
1791        fn test_display_writer() {
1792            let buffer = Cursor::new(Vec::new());
1793            let output = OutputDestination::Writer(Box::new(buffer));
1794            let display = format!("{}", output);
1795            assert_eq!(display, "Writer(<dyn Write>)");
1796        }
1797
1798        #[test]
1799        fn test_debug_file() {
1800            let output =
1801                OutputDestination::File("output.html".to_string());
1802            let debug = format!("{:?}", output);
1803            assert_eq!(debug, r#"File("output.html")"#);
1804        }
1805
1806        #[test]
1807        fn test_debug_stdout() {
1808            let output = OutputDestination::Stdout;
1809            let debug = format!("{:?}", output);
1810            assert_eq!(debug, "Stdout");
1811        }
1812
1813        #[test]
1814        fn test_debug_writer() {
1815            let buffer = Cursor::new(Vec::new());
1816            let output = OutputDestination::Writer(Box::new(buffer));
1817            let debug = format!("{:?}", output);
1818            assert_eq!(debug, "Writer(<dyn Write>)");
1819        }
1820
1821        #[test]
1822        fn test_file_to_html_invalid_input() {
1823            let result = markdown_file_to_html(
1824                Some(Path::new("nonexistent.md")),
1825                None,
1826                None,
1827            );
1828            assert!(matches!(result, Err(HtmlError::Io(_))));
1829        }
1830
1831        #[test]
1832        fn test_file_to_html_with_invalid_output_path(
1833        ) -> Result<(), HtmlError> {
1834            let input = NamedTempFile::new()?;
1835            std::fs::write(&input, "# Test")?;
1836
1837            let result = markdown_file_to_html(
1838                Some(input.path()),
1839                Some(OutputDestination::File(
1840                    "/invalid/path/test.html".to_string(),
1841                )),
1842                None,
1843            );
1844            assert!(result.is_err());
1845            Ok(())
1846        }
1847
1848        // Test for Default implementation of OutputDestination
1849        #[test]
1850        fn test_output_destination_default() {
1851            let default = OutputDestination::default();
1852            assert!(matches!(default, OutputDestination::Stdout));
1853        }
1854
1855        // Test for Debug implementation of OutputDestination
1856        #[test]
1857        fn test_output_destination_debug() {
1858            let file_debug = format!(
1859                "{:?}",
1860                OutputDestination::File(
1861                    "path/to/file.html".to_string()
1862                )
1863            );
1864            assert_eq!(file_debug, r#"File("path/to/file.html")"#);
1865
1866            let writer_debug = format!(
1867                "{:?}",
1868                OutputDestination::Writer(Box::new(Cursor::new(
1869                    Vec::new()
1870                )))
1871            );
1872            assert_eq!(writer_debug, "Writer(<dyn Write>)");
1873
1874            let stdout_debug =
1875                format!("{:?}", OutputDestination::Stdout);
1876            assert_eq!(stdout_debug, "Stdout");
1877        }
1878
1879        // Test for Display implementation of OutputDestination
1880        #[test]
1881        fn test_output_destination_display() {
1882            let file_display = format!(
1883                "{}",
1884                OutputDestination::File(
1885                    "path/to/file.html".to_string()
1886                )
1887            );
1888            assert_eq!(file_display, "File(path/to/file.html)");
1889
1890            let writer_display = format!(
1891                "{}",
1892                OutputDestination::Writer(Box::new(Cursor::new(
1893                    Vec::new()
1894                )))
1895            );
1896            assert_eq!(writer_display, "Writer(<dyn Write>)");
1897
1898            let stdout_display =
1899                format!("{}", OutputDestination::Stdout);
1900            assert_eq!(stdout_display, "Stdout");
1901        }
1902
1903        // Test for Default implementation of HtmlConfig
1904        #[test]
1905        fn test_html_config_default() {
1906            let default = HtmlConfig::default();
1907            assert!(default.enable_syntax_highlighting);
1908            assert_eq!(
1909                default.syntax_theme,
1910                Some(constants::DEFAULT_SYNTAX_THEME.to_string())
1911            );
1912            assert!(!default.minify_output);
1913            assert!(default.add_aria_attributes);
1914            assert!(!default.generate_structured_data);
1915            assert_eq!(
1916                default.max_input_size,
1917                constants::DEFAULT_MAX_INPUT_SIZE
1918            );
1919            assert_eq!(
1920                default.language,
1921                constants::DEFAULT_LANGUAGE.to_string()
1922            );
1923            assert!(!default.generate_toc);
1924        }
1925
1926        // Test for HtmlConfigBuilder
1927        #[test]
1928        fn test_html_config_builder() {
1929            let builder = HtmlConfig::builder()
1930                .with_syntax_highlighting(
1931                    true,
1932                    Some("monokai".to_string()),
1933                )
1934                .with_language("en-US")
1935                .build()
1936                .unwrap();
1937
1938            assert!(builder.enable_syntax_highlighting);
1939            assert_eq!(
1940                builder.syntax_theme,
1941                Some("monokai".to_string())
1942            );
1943            assert_eq!(builder.language, "en-US");
1944        }
1945
1946        // Test for long file path validation
1947        #[test]
1948        fn test_long_file_path_validation() {
1949            let long_path = "a".repeat(constants::MAX_PATH_LENGTH + 1);
1950            let result = HtmlConfig::validate_file_path(long_path);
1951            assert!(
1952                matches!(result, Err(HtmlError::InvalidInput(ref msg)) if msg.contains("File path exceeds maximum length"))
1953            );
1954        }
1955
1956        /// Absolute paths are deliberately accepted: CLI tools invoke the
1957        /// library with fully qualified filenames. Authorisation is the
1958        /// caller's responsibility; see [`HtmlConfig::validate_file_path`]
1959        /// docs.
1960        #[test]
1961        fn test_absolute_path_is_accepted() {
1962            let result = HtmlConfig::validate_file_path(
1963                "/absolute/path/to/file.md",
1964            );
1965            assert!(
1966                result.is_ok(),
1967                "absolute paths must be accepted, got {result:?}"
1968            );
1969        }
1970
1971        /// NUL byte smuggling must be rejected — on Unix, C-string path
1972        /// handling silently truncates at the first NUL.
1973        #[test]
1974        fn test_nul_byte_path_is_rejected() {
1975            let result = HtmlConfig::validate_file_path("safe.md\0bad");
1976            assert!(
1977                matches!(result, Err(HtmlError::InvalidInput(ref msg)) if msg.contains("NUL")),
1978                "NUL byte in path must be rejected, got {result:?}"
1979            );
1980        }
1981    }
1982
1983    mod language_validation_extended_tests {
1984        use super::*;
1985
1986        #[test]
1987        fn test_language_code_edge_cases() {
1988            // Test empty string
1989            assert!(!validate_language_code(""));
1990
1991            // Test single character
1992            assert!(!validate_language_code("a"));
1993
1994            // Test incorrect casing
1995            assert!(!validate_language_code("EN-GB"));
1996            assert!(!validate_language_code("en-gb"));
1997
1998            // Test invalid separators
1999            assert!(!validate_language_code("en_GB"));
2000            assert!(!validate_language_code("en GB"));
2001
2002            // Test too many segments
2003            assert!(!validate_language_code("en-GB-extra"));
2004        }
2005
2006        #[test]
2007        fn test_language_code_special_cases() {
2008            // Test with numbers
2009            assert!(!validate_language_code("e1-GB"));
2010            assert!(!validate_language_code("en-G1"));
2011
2012            // Test with special characters
2013            assert!(!validate_language_code("en-GB!"));
2014            assert!(!validate_language_code("en@GB"));
2015
2016            // Test with Unicode characters
2017            assert!(!validate_language_code("あa-GB"));
2018            assert!(!validate_language_code("en-あa"));
2019        }
2020    }
2021
2022    mod integration_extended_tests {
2023        use super::*;
2024
2025        #[test]
2026        fn test_full_conversion_pipeline() -> Result<()> {
2027            // Create temporary files
2028            let temp_dir = tempdir()?;
2029            let input_path = temp_dir.path().join("test.md");
2030            let output_path = temp_dir.path().join("test.html");
2031
2032            // Test content with various Markdown features
2033            let content = r#"---
2034title: Test Document
2035author: Test Author
2036---
2037
2038# Main Heading
2039
2040## Subheading
2041
2042This is a paragraph with *italic* and **bold** text.
2043
2044- List item 1
2045- List item 2
2046  - Nested item
2047  - Another nested item
2048
2049```rust
2050fn main() {
2051    println!("Hello, world!");
2052}
2053```
2054
2055| Column 1 | Column 2 |
2056|----------|----------|
2057| Cell 1   | Cell 2   |
2058
2059> This is a blockquote
2060
2061[Link text](https://example.com)"#;
2062
2063            std::fs::write(&input_path, content)?;
2064
2065            // Configure with all features enabled
2066            let config = MarkdownConfig {
2067                html_config: HtmlConfig {
2068                    enable_syntax_highlighting: true,
2069                    generate_toc: true,
2070                    add_aria_attributes: true,
2071                    generate_structured_data: true,
2072                    minify_output: true,
2073                    ..Default::default()
2074                },
2075                ..Default::default()
2076            };
2077
2078            markdown_file_to_html(
2079                Some(&input_path),
2080                Some(OutputDestination::File(
2081                    output_path.to_string_lossy().into(),
2082                )),
2083                Some(config),
2084            )?;
2085
2086            let html = std::fs::read_to_string(&output_path)?;
2087
2088            // Verify all expected elements are present
2089            println!("Generated HTML: {}", html);
2090            assert!(html.contains("<h1>"));
2091            assert!(html.contains("<h2>"));
2092            assert!(html.contains("<em>"));
2093            assert!(html.contains("<strong>"));
2094            assert!(html.contains("<ul>"));
2095            assert!(html.contains("<li>"));
2096            assert!(html.contains("language-rust"));
2097
2098            // Verify table content instead of specific HTML structure
2099            assert!(html.contains("Column 1"));
2100            assert!(html.contains("Column 2"));
2101            assert!(html.contains("Cell 1"));
2102            assert!(html.contains("Cell 2"));
2103
2104            assert!(html.contains("<blockquote>"));
2105            assert!(html.contains("<a href="));
2106
2107            Ok(())
2108        }
2109
2110        #[test]
2111        fn test_missing_html_config_fallback() {
2112            let config = MarkdownConfig {
2113                encoding: "utf-8".to_string(),
2114                html_config: HtmlConfig {
2115                    enable_syntax_highlighting: false,
2116                    syntax_theme: None,
2117                    ..Default::default()
2118                },
2119            };
2120            let result = markdown_to_html("# Test", Some(config));
2121            assert!(result.is_ok());
2122        }
2123
2124        #[test]
2125        fn test_invalid_output_destination() {
2126            let result = markdown_file_to_html(
2127                Some(Path::new("test.md")),
2128                Some(OutputDestination::File(
2129                    "/root/forbidden.html".to_string(),
2130                )),
2131                None,
2132            );
2133            assert!(result.is_err());
2134        }
2135    }
2136
2137    mod performance_tests {
2138        use super::*;
2139        use std::time::Instant;
2140
2141        #[test]
2142        fn test_large_document_performance() -> Result<()> {
2143            let base_content =
2144                "# Heading\n\nParagraph\n\n- List item\n\n";
2145            let large_content = base_content.repeat(1000);
2146
2147            let start = Instant::now();
2148            let html = markdown_to_html(&large_content, None)?;
2149            let duration = start.elapsed();
2150
2151            // Log performance metrics
2152            println!("Large document conversion took: {:?}", duration);
2153            println!("Input size: {} bytes", large_content.len());
2154            println!("Output size: {} bytes", html.len());
2155
2156            // Basic validation
2157            assert!(html.contains("<h1>"));
2158            assert!(html.contains("<p>"));
2159            assert!(html.contains("<ul>"));
2160
2161            Ok(())
2162        }
2163    }
2164}