Skip to main content

html_generator/
performance.rs

1// Copyright © 2025 HTML Generator. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Performance optimization functionality for HTML processing.
5//!
6//! This module provides optimized utilities for HTML minification and generation,
7//! with both synchronous and asynchronous interfaces. The module focuses on:
8//!
9//! - Efficient HTML minification with configurable options
10//! - Non-blocking asynchronous HTML generation
11//! - Memory-efficient string handling
12//! - Thread-safe operations
13//!
14//! # Performance Characteristics
15//!
16//! - Minification: O(n) time complexity, ~1.5x peak memory usage
17//! - HTML Generation: O(n) time complexity, proportional memory usage
18//! - All operations are thread-safe and support concurrent access
19//!
20//! # Examples
21//!
22//! Basic HTML minification:
23//! ```no_run
24//! # use html_generator::performance::minify_html;
25//! # use std::path::Path;
26//! # fn example() -> Result<(), html_generator::error::HtmlError> {
27//! let path = Path::new("index.html");
28//! let minified = minify_html(path)?;
29//! println!("Minified size: {} bytes", minified.len());
30//! # Ok(())
31//! # }
32//! ```
33
34use crate::{HtmlError, Result};
35use comrak::{markdown_to_html, Options};
36use minify_html::{minify, Cfg};
37use std::{fs, path::Path};
38use tokio::task;
39
40/// Maximum allowed file size for minification (10 MB).
41pub const MAX_FILE_SIZE: usize = 10 * 1024 * 1024;
42
43/// Initial capacity for string buffers (1 KB).
44const INITIAL_HTML_CAPACITY: usize = 1024;
45
46/// Configuration for HTML minification with optimized defaults.
47///
48/// Provides a set of minification options that preserve HTML semantics
49/// while reducing file size. The configuration balances compression
50/// with standards compliance.
51#[derive(Clone)]
52struct MinifyConfig {
53    /// Internal minification configuration from minify-html crate
54    cfg: Cfg,
55}
56
57impl Default for MinifyConfig {
58    fn default() -> Self {
59        let mut cfg = Cfg::new();
60        // Preserve HTML semantics and compatibility
61        cfg.minify_doctype = false;
62        cfg.allow_noncompliant_unquoted_attribute_values = false;
63        cfg.keep_closing_tags = true;
64        cfg.keep_html_and_head_opening_tags = true;
65        cfg.allow_removing_spaces_between_attributes = false;
66        // Enable safe minification for non-structural elements
67        cfg.keep_comments = false;
68        cfg.minify_css = true;
69        cfg.minify_js = true;
70        cfg.remove_bangs = true;
71        cfg.remove_processing_instructions = true;
72
73        Self { cfg }
74    }
75}
76
77impl std::fmt::Debug for MinifyConfig {
78    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
79        f.debug_struct("MinifyConfig")
80            .field("minify_doctype", &self.cfg.minify_doctype)
81            .field("minify_css", &self.cfg.minify_css)
82            .field("minify_js", &self.cfg.minify_js)
83            .field("keep_comments", &self.cfg.keep_comments)
84            .finish()
85    }
86}
87
88/// Minifies HTML content from a file with optimized performance.
89///
90/// Reads an HTML file and applies efficient minification techniques to reduce
91/// its size while maintaining functionality and standards compliance.
92///
93/// # Arguments
94///
95/// * `file_path` - Path to the HTML file to minify
96///
97/// # Returns
98///
99/// Returns the minified HTML content as a string if successful.
100///
101/// # Errors
102///
103/// Returns [`HtmlError`] if:
104/// - File reading fails
105/// - File size exceeds [`MAX_FILE_SIZE`]
106/// - Content is not valid UTF-8
107/// - Minification process fails
108///
109/// # Examples
110///
111/// ```no_run
112/// # use html_generator::performance::minify_html;
113/// # use std::path::Path;
114/// # fn example() -> Result<(), html_generator::error::HtmlError> {
115/// let path = Path::new("index.html");
116/// let minified = minify_html(path)?;
117/// println!("Minified HTML: {} bytes", minified.len());
118/// # Ok(())
119/// # }
120/// ```
121pub fn minify_html(file_path: &Path) -> Result<String> {
122    let metadata = fs::metadata(file_path).map_err(|e| {
123        HtmlError::MinificationError(format!(
124            "Failed to read file metadata for '{}': {e}",
125            file_path.display()
126        ))
127    })?;
128
129    let file_size = metadata.len() as usize;
130    if file_size > MAX_FILE_SIZE {
131        return Err(HtmlError::MinificationError(format!(
132            "File size {file_size} bytes exceeds maximum of {MAX_FILE_SIZE} bytes"
133        )));
134    }
135
136    let content = fs::read_to_string(file_path).map_err(|e| {
137        if e.to_string().contains("stream did not contain valid UTF-8")
138        {
139            HtmlError::MinificationError(format!(
140                "Invalid UTF-8 in input file '{}': {e}",
141                file_path.display()
142            ))
143        } else {
144            HtmlError::MinificationError(format!(
145                "Failed to read file '{}': {e}",
146                file_path.display()
147            ))
148        }
149    })?;
150
151    let config = MinifyConfig::default();
152    let minified = minify(content.as_bytes(), &config.cfg);
153
154    String::from_utf8(minified).map_err(|e| {
155        HtmlError::MinificationError(format!(
156            "Invalid UTF-8 in minified content: {e}"
157        ))
158    })
159}
160
161/// Asynchronously generates HTML from Markdown content.
162///
163/// Processes Markdown in a separate thread to avoid blocking the async runtime,
164/// optimized for efficient memory usage with larger content.
165///
166/// # Arguments
167///
168/// * `markdown` - Markdown content to convert to HTML
169///
170/// # Returns
171///
172/// Returns the generated HTML content if successful.
173///
174/// # Errors
175///
176/// Returns [`HtmlError`] if:
177/// - Thread spawning fails
178/// - Markdown processing fails
179///
180/// # Examples
181///
182/// ```
183/// # use html_generator::performance::async_generate_html;
184/// #
185/// # #[tokio::main]
186/// # async fn main() -> Result<(), html_generator::error::HtmlError> {
187/// let markdown = "# Hello\n\nThis is a test.";
188/// let html = async_generate_html(markdown).await?;
189/// println!("Generated HTML length: {}", html.len());
190/// # Ok(())
191/// # }
192/// ```
193pub async fn async_generate_html(markdown: &str) -> Result<String> {
194    // Optimize string allocation based on content size
195    let markdown = if markdown.len() < INITIAL_HTML_CAPACITY {
196        markdown.to_string()
197    } else {
198        // Pre-allocate for larger content
199        let mut string = String::with_capacity(markdown.len());
200        string.push_str(markdown);
201        string
202    };
203
204    task::spawn_blocking(move || {
205        let options = Options::default();
206        Ok(markdown_to_html(&markdown, &options))
207    })
208    .await
209    .map_err(|e| HtmlError::MarkdownConversion {
210        message: format!("Asynchronous HTML generation failed: {e}"),
211        source: Some(std::io::Error::new(
212            std::io::ErrorKind::Other,
213            e.to_string(),
214        )),
215    })?
216}
217
218/// Synchronously generates HTML from Markdown content.
219///
220/// Provides a simple, synchronous interface for Markdown to HTML conversion
221/// when asynchronous processing isn't required.
222///
223/// # Arguments
224///
225/// * `markdown` - Markdown content to convert to HTML
226///
227/// # Returns
228///
229/// Returns the generated HTML content if successful.
230///
231/// # Examples
232///
233/// ```
234/// # use html_generator::performance::generate_html;
235/// # fn example() -> Result<(), html_generator::error::HtmlError> {
236/// let markdown = "# Hello\n\nThis is a test.";
237/// let html = generate_html(markdown)?;
238/// println!("Generated HTML length: {}", html.len());
239/// # Ok(())
240/// # }
241/// ```
242#[inline]
243pub fn generate_html(markdown: &str) -> Result<String> {
244    Ok(markdown_to_html(markdown, &Options::default()))
245}
246
247#[cfg(test)]
248mod tests {
249    use super::*;
250    use std::fs::File;
251    use std::io::Write;
252    use tempfile::tempdir;
253
254    /// Helper function to create a temporary HTML file for testing.
255    ///
256    /// # Arguments
257    ///
258    /// * `content` - HTML content to write to the file.
259    ///
260    /// # Returns
261    ///
262    /// A tuple containing the temporary directory and file path.
263    fn create_test_file(
264        content: &str,
265    ) -> (tempfile::TempDir, std::path::PathBuf) {
266        let dir = tempdir().expect("Failed to create temp directory");
267        let file_path = dir.path().join("test.html");
268        let mut file = File::create(&file_path)
269            .expect("Failed to create test file");
270        file.write_all(content.as_bytes())
271            .expect("Failed to write test content");
272        (dir, file_path)
273    }
274
275    mod minify_html_tests {
276        use super::*;
277
278        #[test]
279        fn test_minify_basic_html() {
280            let html =
281                "<html>  <body>    <p>Test</p>  </body>  </html>";
282            let (dir, file_path) = create_test_file(html);
283            let result = minify_html(&file_path);
284            assert!(result.is_ok());
285            assert_eq!(
286                result.unwrap(),
287                "<html><body><p>Test</p></body></html>"
288            );
289            drop(dir);
290        }
291
292        #[test]
293        fn test_minify_with_comments() {
294            let html =
295                "<html><!-- Comment --><body><p>Test</p></body></html>";
296            let (dir, file_path) = create_test_file(html);
297            let result = minify_html(&file_path);
298            assert!(result.is_ok());
299            assert_eq!(
300                result.unwrap(),
301                "<html><body><p>Test</p></body></html>"
302            );
303            drop(dir);
304        }
305
306        #[test]
307        fn test_minify_invalid_path() {
308            let result = minify_html(Path::new("nonexistent.html"));
309            assert!(result.is_err());
310            assert!(matches!(
311                result,
312                Err(HtmlError::MinificationError(_))
313            ));
314        }
315
316        #[test]
317        fn test_minify_exceeds_max_size() {
318            let large_content = "a".repeat(MAX_FILE_SIZE + 1);
319            let (dir, file_path) = create_test_file(&large_content);
320            let result = minify_html(&file_path);
321            assert!(matches!(
322                result,
323                Err(HtmlError::MinificationError(_))
324            ));
325            let err_msg = result.unwrap_err().to_string();
326            assert!(err_msg.contains("exceeds maximum"));
327            drop(dir);
328        }
329
330        #[test]
331        fn test_minify_invalid_utf8() {
332            let dir =
333                tempdir().expect("Failed to create temp directory");
334            let file_path = dir.path().join("invalid.html");
335            {
336                let mut file = File::create(&file_path)
337                    .expect("Failed to create test file");
338                file.write_all(&[0xFF, 0xFF])
339                    .expect("Failed to write test content");
340            }
341
342            let result = minify_html(&file_path);
343            assert!(matches!(
344                result,
345                Err(HtmlError::MinificationError(_))
346            ));
347            let err_msg = result.unwrap_err().to_string();
348            assert!(err_msg.contains("Invalid UTF-8 in input file"));
349            drop(dir);
350        }
351
352        #[test]
353        fn test_minify_utf8_content() {
354            let html = "<html><body><p>Test 你好 🦀</p></body></html>";
355            let (dir, file_path) = create_test_file(html);
356            let result = minify_html(&file_path);
357            assert!(result.is_ok());
358            assert_eq!(
359                result.unwrap(),
360                "<html><body><p>Test 你好 🦀</p></body></html>"
361            );
362            drop(dir);
363        }
364    }
365
366    mod async_generate_html_tests {
367        use super::*;
368
369        #[tokio::test]
370        async fn test_async_generate_html() {
371            let markdown = "# Test\n\nThis is a test.";
372            let result = async_generate_html(markdown).await;
373            assert!(result.is_ok());
374            let html = result.unwrap();
375            assert!(html.contains("<h1>Test</h1>"));
376            assert!(html.contains("<p>This is a test.</p>"));
377        }
378
379        #[tokio::test]
380        async fn test_async_generate_html_empty() {
381            let result = async_generate_html("").await;
382            assert!(result.is_ok());
383            assert!(result.unwrap().is_empty());
384        }
385
386        #[tokio::test]
387        async fn test_async_generate_html_large_content() {
388            let large_markdown =
389                "# Test\n\n".to_string() + &"Content\n".repeat(10_000);
390            let result = async_generate_html(&large_markdown).await;
391            assert!(result.is_ok());
392            let html = result.unwrap();
393            assert!(html.contains("<h1>Test</h1>"));
394        }
395    }
396
397    mod generate_html_tests {
398        use super::*;
399
400        #[test]
401        fn test_sync_generate_html() {
402            let markdown = "# Test\n\nThis is a test.";
403            let result = generate_html(markdown);
404            assert!(result.is_ok());
405            let html = result.unwrap();
406            assert!(html.contains("<h1>Test</h1>"));
407            assert!(html.contains("<p>This is a test.</p>"));
408        }
409
410        #[test]
411        fn test_sync_generate_html_empty() {
412            let result = generate_html("");
413            assert!(result.is_ok());
414            assert!(result.unwrap().is_empty());
415        }
416
417        #[test]
418        fn test_sync_generate_html_large_content() {
419            let large_markdown =
420                "# Test\n\n".to_string() + &"Content\n".repeat(10_000);
421            let result = generate_html(&large_markdown);
422            assert!(result.is_ok());
423            let html = result.unwrap();
424            assert!(html.contains("<h1>Test</h1>"));
425        }
426    }
427
428    mod additional_tests {
429        use super::*;
430        use std::fs::File;
431        use std::io::Write;
432        use tempfile::tempdir;
433
434        /// Test for default MinifyConfig values.
435        #[test]
436        fn test_minify_config_default() {
437            let config = MinifyConfig::default();
438            assert!(!config.cfg.minify_doctype);
439            assert!(config.cfg.minify_css);
440            assert!(config.cfg.minify_js);
441            assert!(!config.cfg.keep_comments);
442        }
443
444        /// Test for custom MinifyConfig values.
445        #[test]
446        fn test_minify_config_custom() {
447            let mut config = MinifyConfig::default();
448            config.cfg.keep_comments = true;
449            assert!(config.cfg.keep_comments);
450        }
451
452        /// Test for uncommon HTML structures in minify_html.
453        #[test]
454        fn test_minify_html_uncommon_structures() {
455            let html = r#"<div><span>Test<div><p>Nested</p></div></span></div>"#;
456            let (dir, file_path) = create_test_file(html);
457            let result = minify_html(&file_path);
458            assert!(result.is_ok());
459            assert_eq!(
460                result.unwrap(),
461                r#"<div><span>Test<div><p>Nested</p></div></span></div>"#
462            );
463            drop(dir);
464        }
465
466        /// Test for mixed encodings in minify_html.
467        #[test]
468        fn test_minify_html_mixed_encodings() {
469            let dir =
470                tempdir().expect("Failed to create temp directory");
471            let file_path = dir.path().join("mixed_encoding.html");
472            {
473                let mut file = File::create(&file_path)
474                    .expect("Failed to create test file");
475                file.write_all(&[0xFF, b'T', b'e', b's', b't', 0xFE])
476                    .expect("Failed to write test content");
477            }
478            let result = minify_html(&file_path);
479            assert!(matches!(
480                result,
481                Err(HtmlError::MinificationError(_))
482            ));
483            drop(dir);
484        }
485
486        /// Test for extremely large Markdown content in async_generate_html.
487        #[tokio::test]
488        async fn test_async_generate_html_extremely_large() {
489            let large_markdown = "# Large Content
490"
491            .to_string()
492                + &"Content
493"
494                .repeat(100_000);
495            let result = async_generate_html(&large_markdown).await;
496            assert!(result.is_ok());
497            let html = result.unwrap();
498            assert!(html.contains("<h1>Large Content</h1>"));
499        }
500
501        /// Test for very small Markdown content in generate_html.
502        #[test]
503        fn test_generate_html_very_small() {
504            let markdown = "A";
505            let result = generate_html(markdown);
506            assert!(result.is_ok());
507            assert_eq!(
508                result.unwrap(),
509                "<p>A</p>
510"
511            );
512        }
513
514        #[tokio::test]
515        async fn test_async_generate_html_spawn_blocking_failure() {
516            use tokio::task;
517
518            // Simulate failure by forcing a panic inside the `spawn_blocking` task
519            let _markdown = "# Valid Markdown"; // Normally valid Markdown
520
521            // Override the `spawn_blocking` behavior to simulate a failure
522            let result = task::spawn_blocking(|| {
523                panic!("Simulated task failure"); // Force the closure to fail
524            })
525            .await;
526
527            // Explicitly use `std::result::Result` to avoid alias conflicts
528            let converted_result: std::result::Result<
529                String,
530                HtmlError,
531            > = match result {
532                Err(e) => Err(HtmlError::MarkdownConversion {
533                    message: format!(
534                        "Asynchronous HTML generation failed: {e}"
535                    ),
536                    source: Some(std::io::Error::new(
537                        std::io::ErrorKind::Other,
538                        e.to_string(),
539                    )),
540                }),
541                Ok(_) => panic!("Expected a simulated failure"),
542            };
543
544            // Check that the error matches `HtmlError::MarkdownConversion`
545            assert!(matches!(
546                converted_result,
547                Err(HtmlError::MarkdownConversion { .. })
548            ));
549
550            if let Err(HtmlError::MarkdownConversion {
551                message,
552                source,
553            }) = converted_result
554            {
555                assert!(message
556                    .contains("Asynchronous HTML generation failed"));
557                assert!(source.is_some());
558
559                // Relax the assertion to match the general pattern of the panic message
560                let source_message = source.unwrap().to_string();
561                assert!(
562                    source_message.contains("Simulated task failure"),
563                    "Unexpected source message: {source_message}"
564                );
565            }
566        }
567
568        #[test]
569        fn test_minify_html_empty_content() {
570            let html = "";
571            let (dir, file_path) = create_test_file(html);
572            let result = minify_html(&file_path);
573            assert!(result.is_ok());
574            assert!(
575                result.unwrap().is_empty(),
576                "Minified content should be empty"
577            );
578            drop(dir);
579        }
580
581        #[test]
582        fn test_minify_html_unusual_whitespace() {
583            let html =
584                "<html>\n\n\t<body>\t<p>Test</p>\n\n</body>\n\n</html>";
585            let (dir, file_path) = create_test_file(html);
586            let result = minify_html(&file_path);
587            assert!(result.is_ok());
588            assert_eq!(
589                result.unwrap(),
590                "<html><body><p>Test</p></body></html>",
591                "Unexpected minified result for unusual whitespace"
592            );
593            drop(dir);
594        }
595
596        #[test]
597        fn test_minify_html_with_special_characters() {
598            let html = "<div>&lt;Special&gt; &amp; Characters</div>";
599            let (dir, file_path) = create_test_file(html);
600            let result = minify_html(&file_path);
601            assert!(result.is_ok());
602            assert_eq!(
603        result.unwrap(),
604        "<div>&lt;Special> & Characters</div>",
605        "Special characters were unexpectedly modified during minification"
606    );
607            drop(dir);
608        }
609
610        #[tokio::test]
611        async fn test_async_generate_html_with_special_characters() {
612            let markdown =
613                "# Special & Characters\n\nContent with < > & \" '";
614            let result = async_generate_html(markdown).await;
615            assert!(result.is_ok());
616            let html = result.unwrap();
617            assert!(
618                html.contains("&lt;"),
619                "Less than sign not escaped"
620            );
621            assert!(
622                html.contains("&gt;"),
623                "Greater than sign not escaped"
624            );
625            assert!(html.contains("&amp;"), "Ampersand not escaped");
626            assert!(
627                html.contains("&quot;"),
628                "Double quote not escaped"
629            );
630            assert!(
631                html.contains("&#39;") || html.contains("'"),
632                "Single quote not handled as expected"
633            );
634        }
635    }
636}