penmanship 0.1.0

A Unicode character lookup library for converting text patterns to Unicode characters
Documentation
//! Generate comprehensive mapping documentation
//!
//! This example reads all the Unicode character mappings from the library
//! and generates a formatted Markdown table showing all patterns grouped
//! by category.
//!
//! # Usage
//!
//! ```bash
//! cargo run --example generate_mapping_docs --features=full --release
//! ```
//!
//! Generates `docs/mappings.md` with all character mappings.

use penmanship::categories;
use std::collections::BTreeMap;
use std::fs;
use std::io::Write;

/// Generate the mapping documentation as a string
fn generate_docs() -> String {
    let mut output = String::new();

    // Header
    output.push_str("# Character Mappings\n\n");
    output.push_str(
        "This document lists all Unicode character patterns supported by penmanship.\n\n",
    );

    // Collect all patterns by category
    let mut categories_data: BTreeMap<&str, Vec<(String, String, String)>> = BTreeMap::new();

    // Punctuation
    {
        let mut patterns = Vec::new();
        for (pattern, (character, description)) in categories::punctuation::PUNCTUATION.entries() {
            patterns.push((
                pattern.to_string(),
                character.to_string(),
                description.to_string(),
            ));
        }
        // Sort by character first, then by pattern (so aliases group together)
        patterns.sort_by(|a, b| a.1.cmp(&b.1).then_with(|| a.0.cmp(&b.0)));
        categories_data.insert("Punctuation", patterns);
    }

    // Math
    {
        let mut patterns = Vec::new();
        for (pattern, (character, description)) in categories::math::MATH.entries() {
            patterns.push((
                pattern.to_string(),
                character.to_string(),
                description.to_string(),
            ));
        }
        // Sort by character first, then by pattern (so aliases group together)
        patterns.sort_by(|a, b| a.1.cmp(&b.1).then_with(|| a.0.cmp(&b.0)));
        categories_data.insert("Math", patterns);
    }

    // Greek
    {
        let mut patterns = Vec::new();
        for (pattern, (character, description)) in categories::greek::GREEK.entries() {
            patterns.push((
                pattern.to_string(),
                character.to_string(),
                description.to_string(),
            ));
        }
        // Sort by character first, then by pattern (so aliases group together)
        patterns.sort_by(|a, b| a.1.cmp(&b.1).then_with(|| a.0.cmp(&b.0)));
        categories_data.insert("Greek", patterns);
    }

    // Fractions
    {
        let mut patterns = Vec::new();
        for (pattern, (character, description)) in categories::fractions::FRACTIONS.entries() {
            patterns.push((
                pattern.to_string(),
                character.to_string(),
                description.to_string(),
            ));
        }
        // Sort by character first, then by pattern (so aliases group together)
        patterns.sort_by(|a, b| a.1.cmp(&b.1).then_with(|| a.0.cmp(&b.0)));
        categories_data.insert("Fractions", patterns);
    }

    // Currency
    {
        let mut patterns = Vec::new();
        for (pattern, (character, description)) in categories::currency::CURRENCY.entries() {
            patterns.push((
                pattern.to_string(),
                character.to_string(),
                description.to_string(),
            ));
        }
        // Sort by character first, then by pattern (so aliases group together)
        patterns.sort_by(|a, b| a.1.cmp(&b.1).then_with(|| a.0.cmp(&b.0)));
        categories_data.insert("Currency", patterns);
    }

    // Symbols
    {
        let mut patterns = Vec::new();
        for (pattern, (character, description)) in categories::symbols::SYMBOLS.entries() {
            patterns.push((
                pattern.to_string(),
                character.to_string(),
                description.to_string(),
            ));
        }
        // Sort by character first, then by pattern (so aliases group together)
        patterns.sort_by(|a, b| a.1.cmp(&b.1).then_with(|| a.0.cmp(&b.0)));
        categories_data.insert("Symbols", patterns);
    }

    // Superscripts
    {
        let mut patterns = Vec::new();
        for (pattern, (character, description)) in categories::superscripts::SUPERSCRIPTS.entries()
        {
            patterns.push((
                pattern.to_string(),
                character.to_string(),
                description.to_string(),
            ));
        }
        // Sort by character first, then by pattern (so aliases group together)
        patterns.sort_by(|a, b| a.1.cmp(&b.1).then_with(|| a.0.cmp(&b.0)));
        categories_data.insert("Superscripts", patterns);
    }

    // Subscripts
    {
        let mut patterns = Vec::new();
        for (pattern, (character, description)) in categories::subscripts::SUBSCRIPTS.entries() {
            patterns.push((
                pattern.to_string(),
                character.to_string(),
                description.to_string(),
            ));
        }
        // Sort by character first, then by pattern (so aliases group together)
        patterns.sort_by(|a, b| a.1.cmp(&b.1).then_with(|| a.0.cmp(&b.0)));
        categories_data.insert("Subscripts", patterns);
    }

    // HTML (sample - too many to list all)
    {
        let mut patterns = Vec::new();
        // Just collect a few examples from each part, excluding invisible characters
        let mut count = 0;
        for (pattern, (character, _description)) in categories::html::PART1.entries() {
            // Skip invisible/zero-width characters
            if pattern.contains("Invisible") || pattern.contains("ZeroWidth") {
                continue;
            }
            if count < 10 {
                patterns.push((
                    pattern.to_string(),
                    character.to_string(),
                    "html entity".to_string(), // Shortened description
                ));
                count += 1;
            } else {
                break;
            }
        }
        // Sort by character first, then by pattern (so aliases group together)
        patterns.sort_by(|a, b| a.1.cmp(&b.1).then_with(|| a.0.cmp(&b.0)));
        categories_data.insert("HTML (sample)", patterns);
    }

    // Emoji (sample)
    {
        let patterns = vec![
            (
                ":smile:".to_string(),
                "😄".to_string(),
                "grinning face with smiling eyes".to_string(),
            ),
            (
                ":heart:".to_string(),
                "❤️".to_string(),
                "red heart".to_string(),
            ),
            (
                ":thumbsup:".to_string(),
                "👍".to_string(),
                "thumbs up".to_string(),
            ),
        ];
        categories_data.insert("Emoji (sample)", patterns);
    }

    // Generate markdown for each category
    for (category_name, patterns) in categories_data {
        output.push_str(&format!("## {category_name}\n\n"));

        // Add note for sample categories
        if category_name.contains("sample") {
            if category_name == "HTML (sample)" {
                output.push_str("2200+ total HTML named character references are supported.\n\n");
            } else if category_name == "Emoji (sample)" {
                output.push_str("1800+ emoji shortcodes via emojis crate are supported.\n\n");
            }
        }

        output.push_str("| Pattern | Character | Description |\n");
        output.push_str("|---------|-----------|-------------|\n");

        for (pattern, character, description) in patterns {
            // Escape pipe characters in pattern for markdown
            let escaped_pattern = pattern.replace('|', "\\|");
            output.push_str(&format!(
                "| `{escaped_pattern}` | {character} | {description} |\n"
            ));
        }

        output.push('\n');
    }

    // Add footer
    output.push_str("---\n\n");
    output.push_str("*Generated by `scripts/generate_mapping_docs.rs`*\n");

    output
}

fn main() -> std::io::Result<()> {
    println!("Generating mapping documentation...");

    let output = generate_docs();

    // Create docs directory if it doesn't exist
    fs::create_dir_all("docs")?;

    // Write to file
    let mut file = fs::File::create("docs/mappings.md")?;
    file.write_all(output.as_bytes())?;

    println!("✓ Generated docs/mappings.md");

    Ok(())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_generate_docs() {
        // Test the generation logic without writing to disk
        let output = generate_docs();

        // Check that output contains expected sections
        assert!(output.contains("# Character Mappings"));
        assert!(output.contains("## Punctuation"));
        assert!(output.contains("## Math"));
        assert!(output.contains("## Greek"));
        assert!(output.len() > 1000); // Should be a substantial document
    }
}