Skip to main content

html_generator/
utils.rs

1// Copyright © 2025 HTML Generator. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! Utility functions for HTML and Markdown processing.
5//!
6//! This module provides various utility functions for tasks such as
7//! extracting front matter from Markdown content and formatting HTML headers.
8
9use crate::error::{HtmlError, Result};
10use crate::seo::escape_html;
11use once_cell::sync::Lazy;
12use regex::Regex;
13use scraper::ElementRef;
14use serde_json::Value;
15use std::collections::HashMap;
16
17static FRONT_MATTER_REGEX: Lazy<Regex> = Lazy::new(|| {
18    Regex::new(r"(?ms)^---\s*\n(.*?)\n---\s*\n")
19        .expect("static FRONT_MATTER_REGEX must compile")
20});
21
22static TOML_FRONT_MATTER_REGEX: Lazy<Regex> = Lazy::new(|| {
23    Regex::new(r"(?ms)^\+\+\+\s*\n(.*?)\n\+\+\+\s*\n")
24        .expect("static TOML_FRONT_MATTER_REGEX must compile")
25});
26
27static HEADER_REGEX: Lazy<Regex> = Lazy::new(|| {
28    Regex::new(r"<(h[1-6])(?:\s[^>]*)?>(.+?)</h[1-6]>")
29        .expect("static HEADER_REGEX must compile")
30});
31
32/// Maximum allowed input size (in bytes) to prevent DOS attacks
33const MAX_INPUT_SIZE: usize = 1_000_000; // 1 MB
34
35/// Extracts front matter from Markdown content.
36///
37/// # Arguments
38///
39/// * `content` - A string slice that holds the content to process.
40///
41/// # Returns
42///
43/// * `Result<String>` - The content with front matter removed, or an error.
44///
45/// # Errors
46///
47/// This function will return an error if:
48/// * The input is empty or exceeds the maximum allowed size.
49/// * The front matter is invalidly formatted.
50///
51/// # Examples
52///
53/// ```
54/// use html_generator::utils::extract_front_matter;
55///
56/// let content = "---\ntitle: My Page\n---\n# Hello, world!\n\nThis is a test.";
57/// let result = extract_front_matter(content).unwrap();
58/// assert_eq!(result, "# Hello, world!\n\nThis is a test.");
59/// ```
60pub fn extract_front_matter(content: &str) -> Result<String> {
61    if content.is_empty() {
62        return Err(HtmlError::InvalidInput("Empty input".to_string()));
63    }
64    if content.len() > MAX_INPUT_SIZE {
65        return Err(HtmlError::InputTooLarge(content.len()));
66    }
67
68    if content.starts_with("---") {
69        if let Some(captures) = FRONT_MATTER_REGEX.captures(content) {
70            // Group 1 is the mandatory `(.*?)` — if `captures`
71            // matched, the group is always present.
72            let front_matter = captures
73                .get(1)
74                .expect("front-matter regex group 1 is mandatory")
75                .as_str();
76
77            for line in front_matter.lines() {
78                let trimmed = line.trim();
79                // Skip blank lines and YAML comments
80                if trimmed.is_empty() || trimmed.starts_with('#') {
81                    continue;
82                }
83                if !trimmed.contains(':') {
84                    return Err(HtmlError::InvalidFrontMatterFormat(
85                        format!(
86                            "Invalid line in front matter: {}",
87                            line
88                        ),
89                    ));
90                }
91            }
92
93            let remaining_content =
94                &content[captures.get(0).map_or(0, |m| m.end())..];
95            Ok(remaining_content.trim().to_string())
96        } else {
97            Err(HtmlError::InvalidFrontMatterFormat(
98                "Invalid front matter format".to_string(),
99            ))
100        }
101    } else {
102        Ok(content.to_string())
103    }
104}
105
106/// Extracts and parses front matter from content, supporting YAML (`---`),
107/// TOML (`+++`), and JSON (`{`...`}`) delimiters.
108///
109/// Returns a tuple of (parsed front matter as JSON Value, remaining content).
110/// If no front matter is found, returns (`Value::Null`, original content).
111///
112/// # Arguments
113///
114/// * `content` - A string slice that holds the content to process.
115///
116/// # Returns
117///
118/// * `Result<(Value, String)>` - Parsed front matter and remaining content, or an error.
119///
120/// # Errors
121///
122/// This function will return an error if:
123/// * The input is empty or exceeds the maximum allowed size.
124/// * The front matter is invalidly formatted or cannot be parsed.
125///
126/// # Examples
127///
128/// ```
129/// use html_generator::utils::extract_front_matter_data;
130/// use serde_json::json;
131///
132/// let content = "---\ntitle: My Page\n---\n# Hello, world!";
133/// let (data, rest) = extract_front_matter_data(content).unwrap();
134/// assert_eq!(data["title"], "My Page");
135/// assert_eq!(rest, "# Hello, world!");
136/// ```
137pub fn extract_front_matter_data(
138    content: &str,
139) -> Result<(Value, String)> {
140    if content.is_empty() {
141        return Err(HtmlError::InvalidInput("Empty input".to_string()));
142    }
143    if content.len() > MAX_INPUT_SIZE {
144        return Err(HtmlError::InputTooLarge(content.len()));
145    }
146
147    // YAML front matter (---)
148    if content.starts_with("---") {
149        if let Some(captures) = FRONT_MATTER_REGEX.captures(content) {
150            let raw = captures
151                .get(1)
152                .expect("front-matter regex group 1 is mandatory")
153                .as_str();
154
155            let map = parse_yaml_to_map(raw)?;
156            let remaining =
157                &content[captures.get(0).map_or(0, |m| m.end())..];
158            return Ok((
159                Value::Object(map),
160                remaining.trim().to_string(),
161            ));
162        }
163        return Err(HtmlError::InvalidFrontMatterFormat(
164            "Invalid YAML front matter format".to_string(),
165        ));
166    }
167
168    // TOML front matter (+++)
169    if content.starts_with("+++") {
170        if let Some(captures) =
171            TOML_FRONT_MATTER_REGEX.captures(content)
172        {
173            let raw = captures
174                .get(1)
175                .expect("TOML front-matter regex group 1 is mandatory")
176                .as_str();
177
178            let map = parse_toml_to_map(raw)?;
179            let remaining =
180                &content[captures.get(0).map_or(0, |m| m.end())..];
181            return Ok((
182                Value::Object(map),
183                remaining.trim().to_string(),
184            ));
185        }
186        return Err(HtmlError::InvalidFrontMatterFormat(
187            "Invalid TOML front matter format".to_string(),
188        ));
189    }
190
191    // JSON front matter ({...})
192    if content.starts_with('{') {
193        if let Some(end) = find_matching_brace(content) {
194            let json_str = &content[..=end];
195            let value: Value =
196                serde_json::from_str(json_str).map_err(|e| {
197                    HtmlError::InvalidFrontMatterFormat(format!(
198                        "Invalid JSON front matter: {e}"
199                    ))
200                })?;
201            let remaining = content[end + 1..].trim_start();
202            return Ok((value, remaining.to_string()));
203        }
204        return Err(HtmlError::InvalidFrontMatterFormat(
205            "Unmatched opening brace in JSON front matter".to_string(),
206        ));
207    }
208
209    // No front matter found
210    Ok((Value::Null, content.to_string()))
211}
212
213/// Parses a YAML front matter block into a serde_json Map using
214/// the inlined private `yaml` module (a pure-Rust,
215/// `forbid(unsafe_code)` implementation; see `src/yaml/mod.rs` for
216/// the vendor rationale).
217fn parse_yaml_to_map(
218    raw: &str,
219) -> Result<serde_json::Map<String, Value>> {
220    let value: Value = crate::yaml::from_str(raw).map_err(|e| {
221        HtmlError::InvalidFrontMatterFormat(format!(
222            "Invalid YAML front matter: {e}"
223        ))
224    })?;
225    match value {
226        Value::Object(map) => Ok(map),
227        _ => Err(HtmlError::InvalidFrontMatterFormat(
228            "YAML front matter must be a mapping".to_string(),
229        )),
230    }
231}
232
233/// Parses a TOML front matter block into a serde_json Map using the `toml` crate.
234fn parse_toml_to_map(
235    raw: &str,
236) -> Result<serde_json::Map<String, Value>> {
237    let toml_value: toml::Value = toml::from_str(raw).map_err(|e| {
238        HtmlError::InvalidFrontMatterFormat(format!(
239            "Invalid TOML front matter: {e}"
240        ))
241    })?;
242    // Convert toml::Value -> serde_json::Value via serialization round-trip
243    let json_value: Value =
244        serde_json::to_value(toml_value).map_err(|e| {
245            HtmlError::InvalidFrontMatterFormat(format!(
246                "Failed to convert TOML to JSON: {e}"
247            ))
248        })?;
249    // `toml::from_str::<toml::Value>` only succeeds on a table
250    // document, so the round-trip through serde_json yields an
251    // `Object` every time.
252    match json_value {
253        Value::Object(map) => Ok(map),
254        _ => Err(HtmlError::InvalidFrontMatterFormat(
255            "TOML document must parse as a table".to_string(),
256        )),
257    }
258}
259
260/// Finds the index of the closing `}` that matches the opening `{` at index 0.
261fn find_matching_brace(content: &str) -> Option<usize> {
262    let mut depth: usize = 0;
263    let mut in_string = false;
264    let mut prev_backslash = false;
265
266    for (i, ch) in content.char_indices() {
267        if in_string {
268            if ch == '\\' && !prev_backslash {
269                prev_backslash = true;
270                continue;
271            }
272            if ch == '"' && !prev_backslash {
273                in_string = false;
274            }
275            prev_backslash = false;
276            continue;
277        }
278        match ch {
279            '"' => in_string = true,
280            '{' => depth += 1,
281            '}' => {
282                depth -= 1;
283                if depth == 0 {
284                    return Some(i);
285                }
286            }
287            _ => {}
288        }
289        prev_backslash = false;
290    }
291    None
292}
293
294/// Formats a header with an ID and class.
295///
296/// # Arguments
297///
298/// * `header` - A string slice that holds the HTML header to process.
299/// * `id_generator` - An optional function that generates the ID from the header content.
300/// * `class_generator` - An optional function that generates the class from the header content.
301///
302/// # Returns
303///
304/// * `Result<String>` - The formatted HTML header, or an error.
305///
306/// # Examples
307///
308/// ```
309/// use html_generator::utils::format_header_with_id_class;
310///
311/// let header = "<h2>Hello, World!</h2>";
312/// let result = format_header_with_id_class(header, None, None).unwrap();
313/// assert_eq!(result, "<h2 id=\"hello-world\" class=\"hello-world\">Hello, World!</h2>");
314/// ```
315pub fn format_header_with_id_class(
316    header: &str,
317    id_generator: Option<fn(&str) -> String>,
318    class_generator: Option<fn(&str) -> String>,
319) -> Result<String> {
320    let captures = HEADER_REGEX.captures(header).ok_or_else(|| {
321        HtmlError::InvalidHeaderFormat(
322            "Invalid header format".to_string(),
323        )
324    })?;
325
326    // Groups 1 and 2 are both mandatory (`(h[1-6])` and `(.+?)`); if
327    // `captures` returned Some, they are always present.
328    let tag = captures
329        .get(1)
330        .expect("header regex group 1 is mandatory")
331        .as_str();
332
333    let text_content = captures
334        .get(2)
335        .expect("header regex group 2 is mandatory")
336        .as_str();
337
338    let id = id_generator.map_or_else(
339        || generate_id(text_content),
340        |generator| generator(text_content),
341    );
342    let class = class_generator.map_or_else(
343        || generate_id(text_content),
344        |generator| generator(text_content),
345    );
346
347    Ok(format!(
348        r#"<{} id="{}" class="{}">{}</{}>"#,
349        tag, id, class, text_content, tag
350    ))
351}
352
353/// Generates a table of contents from HTML content.
354///
355/// # Arguments
356///
357/// * `html` - A string slice that holds the HTML content to process.
358///
359/// # Returns
360///
361/// * `Result<String>` - The generated table of contents as an HTML string, or an error.
362///
363/// # Examples
364///
365/// ```
366/// use html_generator::utils::generate_table_of_contents;
367///
368/// let html = "<h1>Title</h1><p>Some content</p><h2>Subtitle</h2><p>More content</p>";
369/// let result = generate_table_of_contents(html).unwrap();
370/// assert_eq!(result, r#"<ul><li class="toc-h1"><a href="\#title">Title</a></li><li class="toc-h2"><a href="\#subtitle">Subtitle</a></li></ul>"#);
371/// ```
372pub fn generate_table_of_contents(html: &str) -> Result<String> {
373    if html.is_empty() {
374        return Err(HtmlError::InvalidInput("Empty input".to_string()));
375    }
376    if html.len() > MAX_INPUT_SIZE {
377        return Err(HtmlError::InputTooLarge(html.len()));
378    }
379
380    let mut toc = String::new();
381    toc.push_str("<ul>");
382
383    for captures in HEADER_REGEX.captures_iter(html) {
384        if let Some(tag) = captures.get(1) {
385            let content = captures.get(2).map_or("", |m| m.as_str());
386            let id = generate_id(content);
387            toc.push_str(&format!(
388                r#"<li class="toc-{}"><a href="\#{}">{}</a></li>"#,
389                tag.as_str(),
390                id,
391                escape_html(content)
392            ));
393        }
394    }
395
396    toc.push_str("</ul>");
397    Ok(toc)
398}
399
400/// Check if an ARIA role is valid for a given element.
401///
402/// # Arguments
403///
404/// * `role` - The ARIA role to validate.
405/// * `element` - The HTML element to validate.
406///
407/// # Returns
408///
409/// * `bool` - Whether the role is valid for the element.
410///
411/// # Examples
412///
413/// ```
414/// use html_generator::utils::is_valid_aria_role;
415/// use scraper::{Html, Selector};
416///
417/// let dom = Html::parse_fragment(r#"<a href="/x">link</a>"#);
418/// let sel = Selector::parse("a").unwrap();
419/// let a = dom.select(&sel).next().unwrap();
420/// assert!(is_valid_aria_role("link", &a));
421/// assert!(!is_valid_aria_role("checkbox", &a));
422/// ```
423pub fn is_valid_aria_role(role: &str, element: &ElementRef) -> bool {
424    static VALID_ROLES: Lazy<HashMap<&'static str, Vec<&'static str>>> =
425        Lazy::new(|| {
426            let mut roles = HashMap::new();
427            let _ =
428                roles.insert("a", vec!["link", "button", "menuitem"]);
429            let _ = roles.insert("button", vec!["button"]);
430            let _ =
431                roles.insert("div", vec!["alert", "tooltip", "dialog"]);
432            let _ = roles.insert(
433                "input",
434                vec!["textbox", "radio", "checkbox", "searchbox"],
435            );
436            roles
437        });
438
439    if let Some(valid_roles) = VALID_ROLES.get(element.value().name()) {
440        valid_roles.contains(&role)
441    } else {
442        false
443    }
444}
445
446/// Validates a language code.
447///
448/// # Arguments
449///
450/// * `lang` - The language code to validate.
451///
452/// # Returns
453///
454/// * `bool` - Whether the language code is valid.
455///
456/// # Examples
457///
458/// ```
459/// use html_generator::utils::is_valid_language_code;
460///
461/// assert!(is_valid_language_code("en"));
462/// assert!(is_valid_language_code("en-GB"));
463/// assert!(!is_valid_language_code("EN"));
464/// assert!(!is_valid_language_code("z"));
465/// ```
466pub fn is_valid_language_code(lang: &str) -> bool {
467    let parts: Vec<&str> = lang.split('-').collect();
468    if parts.is_empty() || parts[0].len() < 2 || parts[0].len() > 3 {
469        return false;
470    }
471    parts[0].chars().all(|c| c.is_ascii_lowercase())
472}
473
474/// Generates a slug-like ID from the given content.
475///
476/// Walks the input once: alphanumerics pass through as lowercase, any
477/// other character becomes a single `-` (duplicates collapsed), and
478/// trailing/leading dashes are trimmed. Allocates exactly one `String`.
479fn generate_id(content: &str) -> String {
480    let mut out = String::with_capacity(content.len());
481    let mut last_dash = true;
482    for ch in content.chars().flat_map(char::to_lowercase) {
483        if ch.is_alphanumeric() {
484            out.push(ch);
485            last_dash = false;
486        } else if !last_dash {
487            out.push('-');
488            last_dash = true;
489        }
490    }
491    while out.ends_with('-') {
492        let _ = out.pop();
493    }
494    out
495}
496
497#[cfg(test)]
498mod tests {
499    use super::*;
500    use scraper::Html;
501
502    /// Tests for `extract_front_matter` function.
503    mod extract_front_matter_tests {
504        use super::*;
505
506        #[test]
507        fn test_valid_front_matter() {
508            let content = "---\ntitle: My Page\n---\n# Hello, world!\n\nThis is a test.";
509            let result = extract_front_matter(content);
510            let extracted = result.expect("valid front matter");
511            assert_eq!(extracted, "# Hello, world!\n\nThis is a test.");
512        }
513
514        #[test]
515        fn test_no_front_matter() {
516            let content = "# Hello, world!\n\nThis is a test without front matter.";
517            let result = extract_front_matter(content);
518            let extracted =
519                result.expect("valid no-front-matter input");
520            assert_eq!(extracted, content);
521        }
522
523        #[test]
524        fn test_empty_input() {
525            let content = "";
526            let result = extract_front_matter(content);
527            assert!(matches!(result, Err(HtmlError::InvalidInput(_))));
528        }
529
530        #[test]
531        fn test_exceeding_max_input_size() {
532            let content = "a".repeat(MAX_INPUT_SIZE + 1);
533            let result = extract_front_matter(&content);
534            assert!(matches!(result, Err(HtmlError::InputTooLarge(_))));
535        }
536
537        #[test]
538        fn test_invalid_front_matter_format() {
539            let content =
540                "---\ntitle: value\ninvalid_line\n---\nContent";
541            let result = extract_front_matter(content);
542            assert!(matches!(
543                result,
544                Err(HtmlError::InvalidFrontMatterFormat(_))
545            ));
546        }
547
548        #[test]
549        fn test_valid_front_matter_with_extra_content() {
550            let content = "---\ntitle: Page\n---\n\n# Title\n\nContent";
551            let result = extract_front_matter(content);
552            assert!(result.is_ok());
553            assert_eq!(result.unwrap(), "# Title\n\nContent");
554        }
555
556        #[test]
557        fn test_extract_front_matter_with_mid_document_delimiter() {
558            let content = "# Title\nContent\n---\nkey: value\n---";
559            let result = extract_front_matter(content);
560            assert!(result.is_ok());
561            assert_eq!(result.unwrap(), content);
562        }
563    }
564
565    /// Tests for `format_header_with_id_class` function.
566    mod format_header_with_id_class_tests {
567        use super::*;
568
569        #[test]
570        fn test_valid_header_default_generators() {
571            let header = "<h2>Hello, World!</h2>";
572            let result =
573                format_header_with_id_class(header, None, None);
574            let formatted = result.expect("valid header");
575            assert_eq!(formatted, "<h2 id=\"hello-world\" class=\"hello-world\">Hello, World!</h2>");
576        }
577
578        #[test]
579        fn test_custom_id_and_class_generators() {
580            let header = "<h3>Test Header</h3>";
581            fn id_gen(content: &str) -> String {
582                format!(
583                    "custom-{}",
584                    content.to_lowercase().replace(' ', "-")
585                )
586            }
587            fn class_gen(_: &str) -> String {
588                "custom-class".to_string()
589            }
590            let result = format_header_with_id_class(
591                header,
592                Some(id_gen),
593                Some(class_gen),
594            );
595            let formatted =
596                result.expect("valid header with custom generators");
597            assert_eq!(formatted, "<h3 id=\"custom-test-header\" class=\"custom-class\">Test Header</h3>");
598        }
599
600        #[test]
601        fn test_invalid_header_format() {
602            let header = "<p>Not a header</p>";
603            let result =
604                format_header_with_id_class(header, None, None);
605            assert!(matches!(
606                result,
607                Err(HtmlError::InvalidHeaderFormat(_))
608            ));
609        }
610
611        #[test]
612        fn test_header_with_nested_tags() {
613            let header = "<h2><span>Nested Header</span></h2>";
614            let result =
615                format_header_with_id_class(header, None, None);
616            assert!(result.is_ok());
617            assert_eq!(
618                result.unwrap(),
619                "<h2 id=\"span-nested-header-span\" class=\"span-nested-header-span\"><span>Nested Header</span></h2>"
620            );
621        }
622
623        #[test]
624        fn test_format_header_with_long_content() {
625            let header = format!("<h1>{}</h1>", "a".repeat(300));
626            let result =
627                format_header_with_id_class(&header, None, None);
628            assert!(result.is_ok());
629        }
630
631        #[test]
632        fn test_header_with_special_characters() {
633            let header = "<h3>Special & Header!</h3>";
634            let result =
635                format_header_with_id_class(header, None, None);
636            assert!(result.is_ok());
637            assert_eq!(
638                result.unwrap(),
639                "<h3 id=\"special-header\" class=\"special-header\">Special & Header!</h3>"
640            );
641        }
642    }
643
644    /// Tests for `generate_table_of_contents` function.
645    mod generate_table_of_contents_tests {
646        use super::*;
647
648        #[test]
649        fn test_valid_html_with_headers() {
650            let html = "<h1>Title</h1><h2>Subtitle</h2>";
651            let result = generate_table_of_contents(html);
652            let toc = result.expect("valid headers produce a TOC");
653            assert_eq!(
654                toc,
655                r#"<ul><li class="toc-h1"><a href="\#title">Title</a></li><li class="toc-h2"><a href="\#subtitle">Subtitle</a></li></ul>"#
656            );
657        }
658
659        #[test]
660        fn test_html_without_headers() {
661            let html = "<p>No headers here.</p>";
662            let result = generate_table_of_contents(html);
663            let toc =
664                result.expect("no headers still yields an empty TOC");
665            assert_eq!(toc, "<ul></ul>");
666        }
667
668        #[test]
669        fn test_empty_html() {
670            let html = "";
671            let result = generate_table_of_contents(html);
672            assert!(matches!(result, Err(HtmlError::InvalidInput(_))));
673        }
674
675        #[test]
676        fn test_large_html_content() {
677            let html = "<h1>Header</h1>".repeat(1000);
678            let result = generate_table_of_contents(&html);
679            assert!(result.is_ok());
680        }
681
682        #[test]
683        fn test_generate_table_of_contents_with_malformed_html() {
684            let html = "<h1>Title<h2>Subtitle";
685            let result = generate_table_of_contents(html);
686            assert!(result.is_ok());
687            assert_eq!(result.unwrap(), "<ul></ul>");
688        }
689
690        #[test]
691        fn test_generate_table_of_contents_with_attributes() {
692            let html = r#"<h1 class="header-class">Header</h1>"#;
693            let result = generate_table_of_contents(html);
694            assert!(result.is_ok());
695            assert_eq!(
696                result.unwrap(),
697                r#"<ul><li class="toc-h1"><a href="\#header">Header</a></li></ul>"#
698            );
699        }
700    }
701
702    /// Tests for ARIA validation and utilities.
703    mod aria_validation_tests {
704        use super::*;
705
706        #[test]
707        fn test_valid_aria_role_for_button() {
708            let html =
709                Html::parse_fragment("<button role='button'></button>");
710            let element = html
711                .select(&scraper::Selector::parse("button").unwrap())
712                .next()
713                .unwrap();
714            assert!(is_valid_aria_role("button", &element));
715        }
716
717        #[test]
718        fn test_invalid_aria_role_for_button() {
719            let html =
720                Html::parse_fragment("<button role='link'></button>");
721            let element = html
722                .select(&scraper::Selector::parse("button").unwrap())
723                .next()
724                .unwrap();
725            assert!(!is_valid_aria_role("link", &element));
726        }
727
728        #[test]
729        fn test_missing_required_aria_properties() {
730            let html =
731                Html::parse_fragment(r#"<div role="slider"></div>"#);
732            let element = html
733                .select(&scraper::Selector::parse("div").unwrap())
734                .next()
735                .unwrap();
736            let missing = crate::accessibility::utils::get_missing_required_aria_properties(&element);
737            assert_eq!(
738                missing.unwrap(),
739                vec![
740                    "aria-valuenow".to_string(),
741                    "aria-valuemin".to_string(),
742                    "aria-valuemax".to_string()
743                ]
744            );
745        }
746
747        #[test]
748        fn test_get_missing_required_aria_properties_valid_role() {
749            let html = Html::parse_fragment(
750                r#"<div role="slider" aria-valuenow="10" aria-valuemin="0" aria-valuemax="100"></div>"#,
751            );
752            let element = html
753                .select(&scraper::Selector::parse("div").unwrap())
754                .next()
755                .unwrap();
756            let missing = crate::accessibility::utils::get_missing_required_aria_properties(&element);
757            assert!(missing.is_none());
758        }
759
760        #[test]
761        fn test_get_missing_required_aria_properties_unknown_role() {
762            let html =
763                Html::parse_fragment(r#"<div role="unknown"></div>"#);
764            let element = html
765                .select(&scraper::Selector::parse("div").unwrap())
766                .next()
767                .unwrap();
768            let missing = crate::accessibility::utils::get_missing_required_aria_properties(&element);
769            assert!(missing.is_none());
770        }
771    }
772
773    /// Tests for utility functions.
774    mod utility_function_tests {
775        use super::*;
776
777        #[test]
778        fn test_generate_id() {
779            let content = "Test Header!";
780            let result = generate_id(content);
781            assert_eq!(result, "test-header");
782        }
783
784        #[test]
785        fn test_generate_id_with_special_characters() {
786            let content = "Header--with??special**chars";
787            let result = generate_id(content);
788            assert_eq!(result, "header-with-special-chars");
789        }
790
791        #[test]
792        fn test_generate_id_with_leading_trailing_whitespace() {
793            let content = "  Test Header  ";
794            let result = generate_id(content);
795            assert_eq!(result, "test-header");
796        }
797
798        #[test]
799        fn test_generate_id_with_numeric_content() {
800            let content = "12345";
801            let result = generate_id(content);
802            assert_eq!(result, "12345");
803        }
804
805        #[test]
806        fn test_is_valid_language_code() {
807            assert!(is_valid_language_code("en"));
808            assert!(is_valid_language_code("en-US"));
809            assert!(!is_valid_language_code("E"));
810            assert!(!is_valid_language_code("123"));
811        }
812
813        #[test]
814        fn test_is_valid_language_code_long_code() {
815            assert!(is_valid_language_code("en-US-variant-123"));
816        }
817
818        #[test]
819        fn test_is_valid_language_code_non_ascii() {
820            assert!(!is_valid_language_code("日本語"));
821        }
822
823        /// Additional tests for `extract_front_matter` function.
824        #[test]
825        fn test_extract_front_matter_empty_delimiters() {
826            let content = "------\n# Missing proper front matter";
827            let result = extract_front_matter(content);
828            assert!(matches!(
829                result,
830                Err(HtmlError::InvalidFrontMatterFormat(_))
831            ));
832        }
833
834        #[test]
835        fn test_extract_front_matter_large_content_valid_front_matter()
836        {
837            let large_content = format!(
838                "---\nkey: value\n---\n{}",
839                "Content".repeat(5000)
840            );
841            let result = extract_front_matter(&large_content);
842            assert!(result.is_ok());
843        }
844
845        /// Additional tests for `format_header_with_id_class` function.
846        #[test]
847        fn test_format_header_with_malformed_html() {
848            let header = "<h2 Missing closing>";
849            let result =
850                format_header_with_id_class(header, None, None);
851            assert!(matches!(
852                result,
853                Err(HtmlError::InvalidHeaderFormat(_))
854            ));
855        }
856
857        #[test]
858        fn test_format_header_with_inline_styles() {
859            let header =
860                r#"<h2 style="color: red;">Styled Header</h2>"#;
861            let result =
862                format_header_with_id_class(header, None, None);
863            assert!(result.is_ok());
864            assert_eq!(
865            result.unwrap(),
866            "<h2 id=\"styled-header\" class=\"styled-header\">Styled Header</h2>"
867        );
868        }
869
870        /// Additional tests for `generate_table_of_contents` function.
871        #[test]
872        fn test_toc_with_nested_headers() {
873            let html = "<div><h1>Outer</h1><h2>Inner</h2></div>";
874            let result = generate_table_of_contents(html);
875            assert!(result.is_ok());
876            assert_eq!(
877                result.unwrap(),
878                r#"<ul><li class="toc-h1"><a href="\#outer">Outer</a></li><li class="toc-h2"><a href="\#inner">Inner</a></li></ul>"#
879            );
880        }
881
882        #[test]
883        fn test_toc_with_malformed_and_valid_headers() {
884            let html = "<h1>Valid</h1><h2 Malformed>";
885            let result = generate_table_of_contents(html);
886            assert!(result.is_ok());
887            assert_eq!(
888                result.unwrap(),
889                r#"<ul><li class="toc-h1"><a href="\#valid">Valid</a></li></ul>"#
890            );
891        }
892
893        /// Additional tests for `is_valid_aria_role` function.
894        #[test]
895        fn test_unsupported_html_element() {
896            let html = Html::parse_fragment(
897                "<unsupported role='custom'></unsupported>",
898            );
899            let element = html
900                .select(
901                    &scraper::Selector::parse("unsupported").unwrap(),
902                )
903                .next()
904                .unwrap();
905            assert!(!is_valid_aria_role("custom", &element));
906        }
907
908        /// Additional tests for `is_valid_language_code` function.
909        #[test]
910        fn test_is_valid_language_code_with_mixed_case() {
911            assert!(!is_valid_language_code("eN-uS"));
912            assert!(!is_valid_language_code("En#Us"));
913        }
914
915        /// Additional tests for `generate_id` function.
916        #[test]
917        fn test_generate_id_empty_content() {
918            let content = "";
919            let result = generate_id(content);
920            assert_eq!(result, "");
921        }
922
923        #[test]
924        fn test_generate_id_whitespace_content() {
925            let content = "   ";
926            let result = generate_id(content);
927            assert_eq!(result, "");
928        }
929
930        #[test]
931        fn test_generate_id_symbols_only() {
932            let content = "!@#$%^&*()";
933            let result = generate_id(content);
934            assert_eq!(result, "");
935        }
936    }
937}