penmanship 0.1.0

A Unicode character lookup library for converting text patterns to Unicode characters
Documentation
//! HTML named character reference mappings.
//!
//! Supports all HTML named character references from the WHATWG HTML Living Standard.
//! Only the `&entity;` format is supported (with ampersand and semicolon).
//!
//! Source: <https://html.spec.whatwg.org/entities.json>

pub(crate) mod part1;
pub(crate) mod part2;
pub(crate) mod part3;

// Re-export parts for documentation generation
pub use part1::PART1;
pub use part2::PART2;
pub use part3::PART3;

/// Look up an HTML named character reference.
///
/// Returns a tuple of (character, description) if the entity is found,
/// or `None` if not recognized.
///
/// # Examples
///
/// ```
/// use penmanship::categories::html::lookup_html;
///
/// assert_eq!(lookup_html("&nbsp;"), Some(("\u{00A0}", "html named character reference")));
/// assert_eq!(lookup_html("&copy;"), Some(("\u{00A9}", "html named character reference")));
/// assert_eq!(lookup_html("&alpha;"), Some(("\u{03B1}", "html named character reference")));
/// ```
pub fn lookup_html<S: AsRef<str>>(pattern: S) -> Option<(&'static str, &'static str)> {
    let pattern = pattern.as_ref();
    part1::PART1
        .get(pattern)
        .or_else(|| part2::PART2.get(pattern))
        .or_else(|| part3::PART3.get(pattern))
        .copied()
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Test HTML entity lookups.
    #[test]
    fn test_html_entities() {
        assert_eq!(
            lookup_html("&nbsp;"),
            Some(("\u{00A0}", "html named character reference"))
        );
        assert_eq!(
            lookup_html("&copy;"),
            Some(("\u{00A9}", "html named character reference"))
        );
        assert_eq!(
            lookup_html("&alpha;"),
            Some(("\u{03B1}", "html named character reference"))
        );
    }

    /// Test that entities without & prefix are not supported.
    #[test]
    fn test_no_ampersand_not_supported() {
        assert_eq!(lookup_html("nbsp;"), None);
        assert_eq!(lookup_html("copy;"), None);
        assert_eq!(lookup_html("alpha;"), None);
    }

    /// Test that improperly formatted entities (without semicolon) are not supported.
    #[test]
    fn test_improperly_formatted_entities() {
        assert_eq!(lookup_html("&nbsp"), None);
        assert_eq!(lookup_html("&copy"), None);
    }

    /// Test various common HTML entities.
    #[test]
    fn test_common_entities() {
        assert_eq!(
            lookup_html("&lt;"),
            Some(("\u{003C}", "html named character reference"))
        ); // <
        assert_eq!(
            lookup_html("&gt;"),
            Some(("\u{003E}", "html named character reference"))
        ); // >
        assert_eq!(
            lookup_html("&amp;"),
            Some(("\u{0026}", "html named character reference"))
        ); // &
        assert_eq!(
            lookup_html("&quot;"),
            Some(("\u{0022}", "html named character reference"))
        ); // "
    }
}