rumdl 0.1.51 - Docs.rs

//! Pure kramdown anchor generation with security and performance hardening
//!
//! This module implements the exact anchor generation algorithm used by pure
//! kramdown (without GFM input), verified against the official kramdown Ruby gem,
//! with additional security hardening and performance optimizations.
//!
//! Algorithm verified against kramdown 2.5.1 Ruby gem:
//! 1. Input validation and size limits
//! 2. Unicode normalization and security filtering
//! 3. Character filtering (ASCII letters, numbers, spaces, hyphens only)
//! 4. Symbol replacements (arrows, ampersands with spaces)
//! 5. Leading character removal until first letter
//! 6. Space → hyphen, case conversion
//! 7. Leading hyphen removal, preserve trailing
//!
//! Key differences from Jekyll/GFM:
//! - Removes underscores entirely
//! - More aggressive character filtering
//! - Preserves all hyphens (no consolidation)
//!
//! Security features:
//! - Unicode normalization (NFC)
//! - Control character filtering
//! - Zero-width character removal
//! - RTL/bidirectional character filtering
//! - Input size limits (prevents DoS)
//! - Performance bounds on consecutive patterns

use regex::Regex;
use std::sync::LazyLock;

// HTML/JSX tag stripping - strip entire HTML tags from heading text before anchor generation
static HTML_TAG_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)</?[a-z][^>]*>").unwrap());

// Code span pattern for extracting content from backtick-delimited spans
static CODE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"`+([^`]*?)`+").unwrap());

/// Generate pure kramdown style anchor fragment from heading text
///
/// This implementation matches pure kramdown's exact behavior (without GFM input),
/// verified against the official kramdown 2.5.1 Ruby gem, with comprehensive security hardening.
///
/// # Critical Fixes Implemented
/// - **Arrow processing bug**: `test-->more` now correctly becomes `test--more` instead of `test--more`
/// - **Unicode boundary safety**: NFC normalization with safe character boundary handling
/// - **Symbol replacement order**: Fixed conflicts with mixed arrow patterns
/// - **Performance protection**: Input size limits and consecutive character bombs
/// - **Security hardening**: Filters dangerous Unicode (RTL, zero-width, control chars)
/// - **Memory safety**: Bounded processing for pathological inputs
/// - **Degenerate inputs**: Empty/number-only inputs correctly return "section"
///
/// # Security Features
/// - Input size limits (max 10KB) with Unicode-safe truncation
/// - Unicode normalization (NFC) with dangerous character filtering
/// - Control character filtering (C0/C1 ranges, non-characters)
/// - Zero-width character removal (ZWS, ZWNJ, word joiner, etc.)
/// - RTL/bidirectional override character filtering
/// - Private use area character filtering
/// - Consecutive character bomb protection (max 50 repetitions)
/// - Performance bounds on all operations (sub-second completion)
///
/// # Examples
/// ```
/// use rumdl_lib::utils::anchor_styles::kramdown;
///
/// // Basic cases
/// assert_eq!(kramdown::heading_to_fragment("Hello World"), "hello-world");
/// assert_eq!(kramdown::heading_to_fragment("test_with_underscores"), "testwithunderscores");
/// assert_eq!(kramdown::heading_to_fragment("Test--Handling"), "test--handling");
///
/// // Fixed arrow processing
/// assert_eq!(kramdown::heading_to_fragment("test-->more"), "test--more");
/// assert_eq!(kramdown::heading_to_fragment("test->more"), "test-more");
/// assert_eq!(kramdown::heading_to_fragment("test > more"), "test--more");
///
/// // Unicode security
/// assert_eq!(kramdown::heading_to_fragment("café"), "caf");
/// assert_eq!(kramdown::heading_to_fragment("safe\u{202E}attack"), "safeattack");
///
/// // Edge cases
/// assert_eq!(kramdown::heading_to_fragment("123"), "section");
/// assert_eq!(kramdown::heading_to_fragment(""), "");
/// ```
pub fn heading_to_fragment(heading: &str) -> String {
    // Input validation
    if heading.is_empty() {
        return String::new(); // Return empty string for empty input (kramdown behavior)
    }

    // Security: Limit input size to prevent DoS attacks
    const MAX_INPUT_SIZE: usize = 10 * 1024; // 10KB
    if heading.len() > MAX_INPUT_SIZE {
        // Find a safe truncation point that doesn't split UTF-8 characters
        let mut truncate_pos = MAX_INPUT_SIZE;
        while truncate_pos > 0 && !heading.is_char_boundary(truncate_pos) {
            truncate_pos -= 1;
        }

        if truncate_pos == 0 {
            // Fallback: use char_indices to find a valid boundary
            truncate_pos = heading
                .char_indices()
                .take_while(|(i, _)| *i < MAX_INPUT_SIZE)
                .last()
                .map(|(i, c)| i + c.len_utf8())
                .unwrap_or(0);
        }

        return heading_to_fragment(&heading[..truncate_pos]);
    }

    let text = heading.trim();
    if text.is_empty() {
        return "section".to_string();
    }

    // Step 1: Unicode normalization and security filtering
    let normalized = normalize_and_filter_unicode(text);
    if normalized.is_empty() {
        return "section".to_string();
    }

    // Step 1.5: Extract code span content to protect from HTML tag stripping,
    // then strip HTML/JSX tags, then restore code span content
    let mut code_extracts: Vec<String> = Vec::new();
    let normalized = CODE_PATTERN
        .replace_all(&normalized, |caps: &regex::Captures| {
            let idx = code_extracts.len();
            let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
            code_extracts.push(content.to_string());
            format!("\x00CODE{idx}\x00")
        })
        .to_string();
    let normalized = HTML_TAG_PATTERN.replace_all(&normalized, "").to_string();
    let mut normalized = normalized;
    for (idx, content) in code_extracts.into_iter().enumerate() {
        normalized = normalized.replace(&format!("\x00CODE{idx}\x00"), &content);
    }

    // Step 2: Character filtering - more aggressive than Jekyll
    // Only ASCII letters, numbers, spaces, hyphens allowed
    // Temporarily allow '>' and '&' for symbol processing
    let mut filtered = String::new();
    for c in normalized.chars() {
        if c.is_ascii_alphabetic() || c.is_ascii_digit() || c == ' ' || c == '-' || c == '>' || c == '&' {
            filtered.push(c);
        }
        // Note: Underscores and Unicode letters are removed (key difference from Jekyll)
    }

    // Step 3: Symbol replacements (kramdown replaces arrows WITH surrounding spaces ONLY)
    // Process in order of specificity to avoid conflicts
    filtered = filtered.replace(" --> ", "----"); // Arrow with spaces → 4 hyphens
    filtered = filtered.replace(" -> ", "---"); // Arrow with spaces → 3 hyphens
    filtered = filtered.replace(" & ", "--"); // Ampersand with spaces → 2 hyphens
    filtered = filtered.replace(" > ", "--"); // Greater than with spaces → 2 hyphens

    // CRITICAL FIX: Transform arrows without spaces to preserve their meaning
    // "test-->more" should become "test->more", not be removed
    filtered = transform_unspaced_arrows(&filtered);

    // Step 4: Find first letter for trimming start
    let mut start_pos = 0;
    let mut found_letter = false;
    for (i, c) in filtered.char_indices() {
        if c.is_ascii_alphabetic() {
            start_pos = i;
            found_letter = true;
            break;
        }
    }

    // If no letters found, return "section" for numbers-only content
    if !found_letter {
        return "section".to_string();
    }

    let trimmed = &filtered[start_pos..];

    // Step 5: Convert spaces to hyphens and lowercase (kramdown does convert spaces!)
    let mut result = String::new();
    for c in trimmed.chars() {
        if c.is_ascii_alphabetic() {
            result.push(c.to_ascii_lowercase());
        } else if c.is_ascii_digit() {
            result.push(c);
        } else {
            // Spaces and existing hyphens become hyphens (kramdown preserves all hyphens)
            result.push('-');
        }
    }

    // Step 6: Remove leading hyphens only (preserve trailing)
    let result = result.trim_start_matches('-').to_string();

    if result.is_empty() {
        "section".to_string()
    } else {
        result
    }
}

/// Normalize Unicode text and filter dangerous characters
///
/// This function applies Unicode NFC normalization and removes:
/// - Control characters (C0 and C1 ranges)
/// - Zero-width characters
/// - RTL/bidirectional override characters
/// - Private use area characters
/// - Non-characters
fn normalize_and_filter_unicode(text: &str) -> String {
    use unicode_normalization::UnicodeNormalization;

    let mut result = String::new();

    // Apply NFC normalization and filter dangerous characters
    for c in text.nfc() {
        // Filter dangerous Unicode categories
        if is_safe_unicode_char(c) {
            result.push(c);
        }
        // Silently drop dangerous characters
    }

    // Additional security: limit consecutive identical characters to prevent bombs
    limit_consecutive_chars(&result)
}

/// Check if a Unicode character is safe for processing
fn is_safe_unicode_char(c: char) -> bool {
    let code = c as u32;

    // Allow basic printable ASCII
    if (0x20..=0x7E).contains(&code) {
        return true;
    }

    // Allow extended ASCII printable
    if (0xA0..=0xFF).contains(&code) {
        return true;
    }

    // Block dangerous ranges
    if is_control_character(code) {
        return false;
    }

    if is_zero_width_character(code) {
        return false;
    }

    if is_bidi_character(code) {
        return false;
    }

    if is_private_use_character(code) {
        return false;
    }

    // Allow other Unicode letters and marks (they'll be filtered later in ASCII filtering)
    true
}

/// Check if character is a control character
fn is_control_character(code: u32) -> bool {
    // C0 controls (0x00-0x1F) except whitespace
    (0x00..=0x1F).contains(&code) && ![0x09, 0x0A, 0x0D].contains(&code)
        // C1 controls (0x80-0x9F)
        || (0x80..=0x9F).contains(&code)
        // DEL character
        || code == 0x7F
        // Line/Paragraph separators
        || [0x2028, 0x2029].contains(&code)
}

/// Check if character is zero-width or invisible
fn is_zero_width_character(code: u32) -> bool {
    [
        0x200B, // Zero Width Space
        0x200C, // Zero Width Non-Joiner
        0x200D, // Zero Width Joiner
        0x2060, // Word Joiner
        0xFEFF, // Zero Width No-Break Space / BOM
        0x061C, // Arabic Letter Mark
        0x034F, // Combining Grapheme Joiner
    ]
    .contains(&code)
}

/// Check if character is bidirectional override or embedding
fn is_bidi_character(code: u32) -> bool {
    // RTL/LTR override and embedding controls
    (0x202A..=0x202E).contains(&code)
        // Isolate controls
        || (0x2066..=0x2069).contains(&code)
}

/// Check if character is in private use areas
fn is_private_use_character(code: u32) -> bool {
    // Basic Multilingual Plane private use
    (0xE000..=0xF8FF).contains(&code)
        // Supplementary private use areas
        || (0xF0000..=0xFFFFD).contains(&code)
        || (0x100000..=0x10FFFD).contains(&code)
        // Non-characters
        || [0xFFFE, 0xFFFF].contains(&code)
        || code == 0xFFFD // Replacement character
}

/// Limit consecutive identical characters to prevent bombs
fn limit_consecutive_chars(text: &str) -> String {
    const MAX_CONSECUTIVE: usize = 50;

    let mut result = String::new();
    let mut last_char = None;
    let mut consecutive_count = 0;

    for c in text.chars() {
        if last_char == Some(c) {
            consecutive_count += 1;
            if consecutive_count >= MAX_CONSECUTIVE {
                continue; // Skip this character
            }
        } else {
            consecutive_count = 1;
        }

        result.push(c);
        last_char = Some(c);
    }

    result
}

/// Transform unspaced arrows and remove remaining symbols
///
/// This fixes the critical bug where "test-->more" became "test--more"
/// by correctly transforming arrows without spaces and removing other symbols.
///
/// Rules:
/// - "-->" (without spaces) becomes "->"
/// - "->" (without spaces) becomes "-"
/// - ">" (standalone) gets removed
/// - "&" (standalone) gets removed
fn transform_unspaced_arrows(text: &str) -> String {
    let mut result = String::new();
    let chars: Vec<char> = text.chars().collect();
    let mut i = 0;

    while i < chars.len() {
        let c = chars[i];

        if c == '-' && i + 2 < chars.len() && chars[i + 1] == '-' && chars[i + 2] == '>' {
            // Found "-->" pattern - transform to "->"
            result.push('-');
            result.push('>');
            i += 3; // Skip the "-->" sequence
        } else if c == '-' && i + 1 < chars.len() && chars[i + 1] == '>' {
            // Found "->" pattern - transform to "-"
            result.push('-');
            i += 2; // Skip the "->" sequence
        } else if c == '>' {
            // Standalone '>' - remove it (don't push)
            i += 1;
        } else if c == '&' {
            // Standalone '&' - remove it (don't push)
            i += 1;
        } else {
            // Normal character - keep it
            result.push(c);
            i += 1;
        }
    }

    result
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_kramdown_basic_cases() {
        assert_eq!(heading_to_fragment("Hello World"), "hello-world");
        assert_eq!(heading_to_fragment("Test Case"), "test-case");
        assert_eq!(heading_to_fragment(""), "");
    }

    #[test]
    fn test_kramdown_underscores_removed() {
        // Pure kramdown removes underscores entirely (key difference from Jekyll)
        assert_eq!(heading_to_fragment("test_with_underscores"), "testwithunderscores");
        assert_eq!(heading_to_fragment("Update login_type"), "update-logintype"); // Space becomes hyphen
        assert_eq!(heading_to_fragment("__dunder__"), "dunder");
    }

    #[test]
    fn test_kramdown_character_filtering() {
        // Pure kramdown is aggressive about character removal
        assert_eq!(heading_to_fragment("API::Response"), "apiresponse");
        assert_eq!(heading_to_fragment("Café René"), "caf-ren"); // Accented chars removed, space becomes hyphen
        assert_eq!(heading_to_fragment("über uns"), "ber-uns"); // Umlaut removed, space becomes hyphen
        assert_eq!(heading_to_fragment("naïve"), "nave"); // Diacritic removed
    }

    #[test]
    fn test_kramdown_hyphen_preservation() {
        // Kramdown preserves ALL hyphens (no consolidation like GitHub)
        assert_eq!(heading_to_fragment("Test-Hyphen"), "test-hyphen"); // 1→1
        assert_eq!(heading_to_fragment("Test--Handling"), "test--handling"); // 2→2 (preserved)
        assert_eq!(heading_to_fragment("Test---Multiple"), "test---multiple"); // 3→3 (preserved)
        assert_eq!(heading_to_fragment("Test----Four"), "test----four"); // 4→4 (preserved)
        assert_eq!(heading_to_fragment("Test-----Five"), "test-----five"); // 5→5 (preserved)
        assert_eq!(heading_to_fragment("Test------Six"), "test------six"); // 6→6 (preserved)
    }

    #[test]
    fn test_kramdown_arrows_issue_39() {
        // Issue #39 cases with pure kramdown behavior
        assert_eq!(
            heading_to_fragment("cbrown --> sbrown: --unsafe-paths"),
            "cbrown----sbrown---unsafe-paths"
        );
        assert_eq!(heading_to_fragment("cbrown -> sbrown"), "cbrown---sbrown");
        assert_eq!(heading_to_fragment("respect_gitignore"), "respectgitignore");

        // CRITICAL FIX: Arrow processing bug - arrows without spaces should be transformed correctly
        assert_eq!(heading_to_fragment("test-->more"), "test--more"); // Fixed: "-->" becomes "--"
        assert_eq!(heading_to_fragment("test->more"), "test-more"); // Fixed: "->" becomes "-"
        assert_eq!(heading_to_fragment("test>more"), "testmore"); // Standalone > removed
        assert_eq!(heading_to_fragment("a->b->c"), "a-b-c"); // Multiple arrows
        assert_eq!(heading_to_fragment("cmd-->output"), "cmd--output"); // Long arrows transformed
    }

    #[test]
    fn test_kramdown_html_jsx_tag_stripping() {
        // HTML/JSX tags should be stripped from headings for anchor generation
        assert_eq!(heading_to_fragment("retentionPolicy<Component />"), "retentionpolicy");
        assert_eq!(heading_to_fragment("Test <span>extra</span>"), "test-extra");
        assert_eq!(heading_to_fragment("Generic<T>"), "generic");
        // Code span content with angle brackets should be preserved
        assert_eq!(heading_to_fragment("`import <FILE>`"), "import-file");
    }

    #[test]
    fn test_kramdown_symbol_replacements() {
        assert_eq!(heading_to_fragment("Testing & Coverage"), "testing--coverage");
        assert_eq!(heading_to_fragment("Compare > Results"), "compare--results");
        assert_eq!(heading_to_fragment("Arrow --> Test"), "arrow----test"); // --> with spaces becomes 4 hyphens
    }

    #[test]
    fn test_kramdown_leading_trimming() {
        // Kramdown removes leading hyphens but preserves trailing
        assert_eq!(heading_to_fragment("---leading"), "leading");
        assert_eq!(heading_to_fragment("trailing---"), "trailing---"); // Trailing preserved
        assert_eq!(heading_to_fragment("---both---"), "both---"); // Leading removed, trailing preserved
        assert_eq!(heading_to_fragment("----both----"), "both----"); // Leading removed, trailing preserved
    }

    #[test]
    fn test_kramdown_numbers() {
        assert_eq!(heading_to_fragment("Step 1: Getting Started"), "step-1-getting-started");
        assert_eq!(heading_to_fragment("Version 2.1.0"), "version-210");
        assert_eq!(heading_to_fragment("123 Numbers"), "numbers"); // Leading numbers trimmed
    }

    #[test]
    fn test_kramdown_comprehensive_verified() {
        // Test cases verified against official kramdown 2.5.1 Ruby gem
        let test_cases = [
            ("cbrown --> sbrown: --unsafe-paths", "cbrown----sbrown---unsafe-paths"),
            ("Update login_type", "update-logintype"),
            ("API::Response > Error--Handling", "apiresponse--error--handling"),
            ("Test---with---multiple---hyphens", "test---with---multiple---hyphens"),
            ("respect_gitignore", "respectgitignore"),
            ("Simple test case", "simple-test-case"),
            ("Testing & Coverage", "testing--coverage"),
            ("test_with_underscores", "testwithunderscores"),
        ];

        for (input, expected) in test_cases {
            let actual = heading_to_fragment(input);
            assert_eq!(
                actual, expected,
                "Kramdown verified test failed for input: '{input}'\nExpected: '{expected}'\nActual: '{actual}'"
            );
        }

        // Additional verified test cases for arrow fixes
        let arrow_test_cases = [
            ("test-->more", "test--more"),     // CRITICAL FIX: "-->" becomes "--"
            ("test->more", "test-more"),       // Single arrow "->" becomes "-"
            ("test > more", "test--more"),     // Spaced greater-than becomes --
            ("test -> more", "test---more"),   // Spaced arrow becomes ---
            ("test --> more", "test----more"), // Spaced long arrow becomes ----
        ];

        for (input, expected) in arrow_test_cases {
            let actual = heading_to_fragment(input);
            assert_eq!(
                actual, expected,
                "Arrow processing test failed for input: '{input}'\nExpected: '{expected}'\nActual: '{actual}'"
            );
        }
    }

    #[test]
    fn test_kramdown_edge_cases() {
        assert_eq!(heading_to_fragment("123"), "section"); // Numbers only
        assert_eq!(heading_to_fragment("!!!"), "section"); // Punctuation only
        assert_eq!(heading_to_fragment("   "), "section"); // Whitespace only
        assert_eq!(heading_to_fragment("a"), "a"); // Single letter
        assert_eq!(heading_to_fragment("1a"), "a"); // Number then letter
    }

    #[test]
    fn test_kramdown_unicode_security() {
        // Unicode normalization and ASCII filtering
        assert_eq!(heading_to_fragment("café"), "caf"); // é filtered out in ASCII-only step (kramdown behavior)
        assert_eq!(heading_to_fragment("cafe\u{0301}"), "caf"); // Normalized to café, then é filtered out

        // Zero-width character removal
        assert_eq!(heading_to_fragment("word\u{200B}break"), "wordbreak");
        assert_eq!(heading_to_fragment("test\u{200C}ing"), "testing");

        // Control character filtering
        assert_eq!(heading_to_fragment("test\u{0000}null"), "testnull");
        assert_eq!(heading_to_fragment("test\u{001B}escape"), "testescape");

        // RTL override filtering
        assert_eq!(heading_to_fragment("safe\u{202E}attack"), "safeattack");

        // Private use area filtering
        assert_eq!(heading_to_fragment("test\u{E000}private"), "testprivate");
    }

    #[test]
    fn test_kramdown_performance_protection() {
        // Large input handling
        let large_input = "a".repeat(20000);
        let result = heading_to_fragment(&large_input);
        assert!(!result.is_empty());
        assert!(result.len() < large_input.len()); // Should be truncated/processed

        // Consecutive character bomb protection
        let bomb = format!("test{}end", "a".repeat(1000));
        let result = heading_to_fragment(&bomb);
        // Should limit consecutive chars to 50, so result has max 50 'a's in a row
        assert!(result.starts_with("test"));
        assert!(result.ends_with("end"));

        // Verify no more than 50 consecutive identical characters
        let mut consecutive_count = 1;
        let mut last_char = None;
        let mut max_consecutive = 0;

        for c in result.chars() {
            if last_char == Some(c) {
                consecutive_count += 1;
            } else {
                max_consecutive = max_consecutive.max(consecutive_count);
                consecutive_count = 1;
            }
            last_char = Some(c);
        }
        max_consecutive = max_consecutive.max(consecutive_count);

        assert!(max_consecutive <= 50, "Too many consecutive chars: {max_consecutive}");

        // Memory allocation stress
        let mixed_stress = "word🎉-中文".repeat(100);
        let result = heading_to_fragment(&mixed_stress);
        assert!(!result.is_empty());
        // Should complete without panic or excessive memory use
    }

    #[test]
    fn test_kramdown_arrow_symbol_replacement_order() {
        // Test that symbol replacement order doesn't cause conflicts
        assert_eq!(heading_to_fragment("test --> more > info"), "test----more--info");
        assert_eq!(heading_to_fragment("cmd -> output & result"), "cmd---output--result");
        assert_eq!(heading_to_fragment("a > b --> c & d"), "a--b----c--d");

        // Mixed spaced and unspaced patterns
        assert_eq!(heading_to_fragment("a->b --> c>d"), "a-b----cd");
    }
}

#[cfg(test)]
mod security_tests {
    use super::*;

    #[test]
    fn test_input_size_limits() {
        // Test that extremely large inputs are handled safely
        let huge_input = "a".repeat(100_000); // 100KB
        let result = heading_to_fragment(&huge_input);

        // Should not panic and should return reasonable result
        assert!(!result.is_empty());
        assert!(result.len() < huge_input.len());
    }

    #[test]
    fn test_unicode_normalization() {
        // Test NFC normalization
        let composed = "é"; // U+00E9 (composed)
        let decomposed = "e\u{0301}"; // e + U+0301 (decomposed)

        let result1 = heading_to_fragment(composed);
        let result2 = heading_to_fragment(decomposed);

        // Both should be processed identically after normalization
        assert_eq!(result1, result2);
    }

    #[test]
    fn test_dangerous_unicode_filtering() {
        // Test that various dangerous Unicode categories are filtered
        let dangerous_cases = vec![
            ("test\u{202E}attack", "RTL override"),
            ("safe\u{200B}split", "Zero-width space"),
            ("ctrl\u{0001}char", "Control character"),
            ("private\u{E000}use", "Private use area"),
            ("nonchar\u{FFFE}test", "Non-character"),
        ];

        for (input, description) in dangerous_cases {
            let result = heading_to_fragment(input);
            // Should not panic and should filter dangerous chars
            assert!(!result.is_empty(), "Failed to handle: {description}");
            // Dangerous characters should be removed
            assert!(!result.contains('\u{202E}'), "RTL override not filtered");
            assert!(!result.contains('\u{200B}'), "Zero-width space not filtered");
        }
    }

    #[test]
    fn test_consecutive_character_limits() {
        // Test protection against character bombs
        let bomb_cases = vec![
            (format!("start{}end", "a".repeat(200)), "a-bomb"),
            (format!("begin{}-finish", "-".repeat(100)), "hyphen-bomb"),
            (format!("test{}more", " ".repeat(150)), "space-bomb"),
        ];

        for (input, description) in bomb_cases {
            let result = heading_to_fragment(&input);

            // Should complete in reasonable time and not contain excessive repeats
            assert!(!result.is_empty(), "Failed to handle: {description}");

            // Check that no single character repeats more than 50 times
            let mut consecutive_count = 1;
            let mut last_char = None;
            let mut max_consecutive = 0;

            for c in result.chars() {
                if last_char == Some(c) {
                    consecutive_count += 1;
                } else {
                    max_consecutive = max_consecutive.max(consecutive_count);
                    consecutive_count = 1;
                }
                last_char = Some(c);
            }
            max_consecutive = max_consecutive.max(consecutive_count);

            assert!(
                max_consecutive <= 50,
                "Consecutive character limit exceeded for {description}: {max_consecutive} consecutive chars"
            );
        }
    }

    #[test]
    fn test_performance_bounds() {
        use std::time::Instant;

        // Test that even pathological inputs complete quickly
        let pathological_cases = vec![
            "a".repeat(10_000),
            "-".repeat(5_000),
            "🎉".repeat(1_000),
            "test".repeat(2_500),
            format!(
                "{} -> {} --> {}",
                "word".repeat(1000),
                "more".repeat(1000),
                "text".repeat(1000)
            ),
        ];

        for input in pathological_cases {
            let start = Instant::now();
            let result = heading_to_fragment(&input);
            let duration = start.elapsed();

            // Should complete within reasonable time (1 second)
            assert!(
                duration < std::time::Duration::from_secs(1),
                "Performance test failed: took {:?} for input of length {}",
                duration,
                input.len()
            );

            // Should produce valid result
            assert!(!result.is_empty() || result == "section");
        }
    }
}