penmanship 0.1.0

A Unicode character lookup library for converting text patterns to Unicode characters
Documentation
//! Core lookup functionality for Unicode character patterns.

use crate::categories;

/// Look up a Unicode character by its text pattern.
///
/// Returns a tuple of (character, description) if the pattern is found,
/// or `None` if the pattern is not recognized.
///
/// # Examples
///
/// ```
/// use penmanship::lookup;
///
/// // Currently returns None until mappings are implemented
/// assert_eq!(lookup("unknown"), None);
/// ```
pub fn lookup<S: AsRef<str>>(pattern: S) -> Option<(&'static str, &'static str)> {
    let pattern = pattern.as_ref();
    // Check each category in order
    #[cfg(feature = "punctuation")]
    if let Some(result) = categories::punctuation::PUNCTUATION.get(pattern) {
        return Some(*result);
    }

    #[cfg(feature = "math")]
    if let Some(result) = categories::math::MATH.get(pattern) {
        return Some(*result);
    }

    #[cfg(feature = "greek")]
    if let Some(result) = categories::greek::GREEK.get(pattern) {
        return Some(*result);
    }

    #[cfg(feature = "fractions")]
    if let Some(result) = categories::fractions::FRACTIONS.get(pattern) {
        return Some(*result);
    }

    #[cfg(feature = "currency")]
    if let Some(result) = categories::currency::CURRENCY.get(pattern) {
        return Some(*result);
    }

    #[cfg(feature = "symbols")]
    if let Some(result) = categories::symbols::SYMBOLS.get(pattern) {
        return Some(*result);
    }

    #[cfg(feature = "superscripts")]
    if let Some(result) = categories::superscripts::SUPERSCRIPTS.get(pattern) {
        return Some(*result);
    }

    #[cfg(feature = "subscripts")]
    if let Some(result) = categories::subscripts::SUBSCRIPTS.get(pattern) {
        return Some(*result);
    }

    // Check HTML named entities
    #[cfg(feature = "html")]
    if let Some(result) = categories::html::lookup_html(pattern) {
        return Some(result);
    }

    // Check emoji shortcodes
    #[cfg(feature = "emoji")]
    if let Some(result) = categories::emoji::lookup_emoji(pattern) {
        return Some(result);
    }

    None
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Test that unknown patterns return None.
    #[test]
    fn test_unknown_pattern() {
        assert_eq!(lookup("unknown"), None);
    }

    /// Test punctuation lookups through main API.
    #[test]
    #[cfg(feature = "punctuation")]
    fn test_punctuation_lookup() {
        assert_eq!(lookup("..."), Some(("\u{2026}", "horizontal ellipsis")));
        assert_eq!(lookup("em"), Some(("\u{2014}", "em dash")));
    }

    /// Test math lookups through main API.
    #[test]
    #[cfg(feature = "math")]
    fn test_math_lookup() {
        assert_eq!(lookup("->"), Some(("\u{2192}", "rightwards arrow")));
        assert_eq!(lookup("infinity"), Some(("\u{221E}", "infinity")));
    }

    /// Test Greek letter lookups through main API.
    #[test]
    #[cfg(feature = "greek")]
    fn test_greek_lookup() {
        assert_eq!(
            lookup("alpha"),
            Some(("\u{03B1}", "greek small letter alpha"))
        );
        assert_eq!(
            lookup("Alpha"),
            Some(("\u{0391}", "greek capital letter alpha"))
        );
    }

    /// Test fraction lookups through main API.
    #[test]
    #[cfg(feature = "fractions")]
    fn test_fractions_lookup() {
        assert_eq!(lookup("1/2"), Some(("\u{00BD}", "fraction one half")));
        assert_eq!(lookup("3/4"), Some(("\u{00BE}", "fraction three quarters")));
    }

    /// Test currency lookups through main API.
    #[test]
    #[cfg(feature = "currency")]
    fn test_currency_lookup() {
        assert_eq!(lookup("euro"), Some(("\u{20AC}", "euro sign")));
        assert_eq!(lookup("pound"), Some(("\u{00A3}", "pound sign")));
    }

    /// Test symbol lookups through main API.
    #[test]
    #[cfg(feature = "symbols")]
    fn test_symbols_lookup() {
        assert_eq!(lookup("(c)"), Some(("\u{00A9}", "copyright sign")));
        assert_eq!(lookup("star"), Some(("\u{2605}", "black star")));
    }

    /// Test superscript lookups through main API.
    #[test]
    #[cfg(feature = "superscripts")]
    fn test_superscripts_lookup() {
        assert_eq!(lookup("^2"), Some(("\u{00B2}", "superscript two")));
    }

    /// Test subscript lookups through main API.
    #[test]
    #[cfg(feature = "subscripts")]
    fn test_subscripts_lookup() {
        assert_eq!(lookup("_2"), Some(("\u{2082}", "subscript two")));
    }

    /// Test HTML entity lookups through main API.
    #[test]
    #[cfg(feature = "html")]
    fn test_html_lookup() {
        assert_eq!(
            lookup("&nbsp;"),
            Some(("\u{00A0}", "html named character reference"))
        );
        assert_eq!(
            lookup("&copy;"),
            Some(("\u{00A9}", "html named character reference"))
        );
        assert_eq!(
            lookup("&alpha;"),
            Some(("\u{03B1}", "html named character reference"))
        );
    }

    /// Test emoji lookups through main API.
    #[test]
    #[cfg(feature = "emoji")]
    fn test_emoji_lookup() {
        assert_eq!(
            lookup(":smile:"),
            Some(("😄", "grinning face with smiling eyes"))
        );
        assert_eq!(lookup(":heart:"), Some(("❤️", "red heart")));
        assert_eq!(lookup(":thumbsup:"), Some(("👍", "thumbs up")));
    }

    /// Test the 'x' pattern collision between math and symbols.
    #[test]
    #[cfg(all(feature = "math", feature = "symbols"))]
    fn test_x_pattern_collision() {
        // With both math and symbols enabled, "x" should resolve to multiplication (math comes first)
        let result = lookup("x");
        assert_eq!(result, Some(("×", "multiplication sign")));
        // Note: The ballot x (✗) from symbols is unreachable with this pattern
    }

    /// Test that there are no duplicate keys across all enabled categories.
    ///
    /// This test merges all patterns from all enabled phf maps and ensures
    /// no pattern is defined in multiple categories (which would cause
    /// unreachable mappings due to lookup order precedence).
    #[test]
    fn test_no_duplicate_patterns() {
        extern crate std;
        use std::collections::BTreeMap;
        use std::vec::Vec;

        // Track all patterns and which category they came from
        let mut all_patterns: BTreeMap<&str, Vec<&str>> = BTreeMap::new();

        // Collect patterns from each category
        #[cfg(feature = "punctuation")]
        for key in categories::punctuation::PUNCTUATION.keys() {
            all_patterns.entry(key).or_default().push("punctuation");
        }

        #[cfg(feature = "math")]
        for key in categories::math::MATH.keys() {
            all_patterns.entry(key).or_default().push("math");
        }

        #[cfg(feature = "greek")]
        for key in categories::greek::GREEK.keys() {
            all_patterns.entry(key).or_default().push("greek");
        }

        #[cfg(feature = "fractions")]
        for key in categories::fractions::FRACTIONS.keys() {
            all_patterns.entry(key).or_default().push("fractions");
        }

        #[cfg(feature = "currency")]
        for key in categories::currency::CURRENCY.keys() {
            all_patterns.entry(key).or_default().push("currency");
        }

        #[cfg(feature = "symbols")]
        for key in categories::symbols::SYMBOLS.keys() {
            all_patterns.entry(key).or_default().push("symbols");
        }

        #[cfg(feature = "superscripts")]
        for key in categories::superscripts::SUPERSCRIPTS.keys() {
            all_patterns.entry(key).or_default().push("superscripts");
        }

        #[cfg(feature = "subscripts")]
        for key in categories::subscripts::SUBSCRIPTS.keys() {
            all_patterns.entry(key).or_default().push("subscripts");
        }

        #[cfg(feature = "html")]
        {
            for key in categories::html::part1::PART1.keys() {
                all_patterns.entry(key).or_default().push("html:part1");
            }
            for key in categories::html::part2::PART2.keys() {
                all_patterns.entry(key).or_default().push("html:part2");
            }
            for key in categories::html::part3::PART3.keys() {
                all_patterns.entry(key).or_default().push("html:part3");
            }
        }

        // Note: Emoji is not included because it uses an external crate
        // and patterns use :colons: so they're unlikely to collide with other categories

        // Find any duplicates
        let mut duplicates = Vec::new();
        for (pattern, categories) in all_patterns.iter() {
            if categories.len() > 1 {
                duplicates.push((pattern, categories));
            }
        }

        // Assert no duplicates found
        if !duplicates.is_empty() {
            // In tests, std is available even with no_std crate
            std::eprintln!("Found duplicate patterns across categories:");
            for (pattern, cats) in &duplicates {
                std::eprintln!("  Pattern '{pattern}' in: {cats:?}");
            }
            panic!(
                "Found {} duplicate pattern(s) across categories",
                duplicates.len()
            );
        }
    }
}