i-dunno 0.6.0

RFC 8771 Internationalized Deliberately Unreadable Network Notation
Documentation
use unic_emoji_char::is_emoji;
use unic_ucd::category::GeneralCategory;
use unic_ucd_bidi::BidiClass;
use unicode_script::Script;

use crate::data::{CONFUSABLES_CHARS, CONFUSABLES_STRS};

pub fn has_confusables(input: &str) -> bool {
    CONFUSABLES_CHARS.iter().any(|&ch| input.contains(ch))
        || CONFUSABLES_STRS.iter().any(|&s| input.contains(s))
}

pub fn has_emoji(input: &str) -> bool {
    input.chars().any(is_emoji)
}

pub fn directionality(input: char) -> BidiClass {
    BidiClass::of(input)
}

/// Return the unicode Script for a character
pub fn script(input: char) -> Script {
    Script::from(input)
}

/// Return true if the supplied char is a Symbol.
pub fn symbol(input: char) -> bool {
    GeneralCategory::of(input).is_symbol()
}

/// Return true if the supplied char is non-printable.
/// Python's documentation says:
///     "Nonprintable characters are those characters defined in the
///     Unicode character database as Other or Separator, excepting the
///     ASCII space."
/// And, lacking any other definitive source, we use that definition here.
pub fn unprintable(input: char) -> bool {
    if input == ' ' {
        return false;
    }

    let category = GeneralCategory::of(input);
    category.is_other() || category.is_separator()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn bell_is_unprintable() {
        assert!(unprintable('\u{0007}'));
    }

    #[test]
    fn a_is_printable() {
        assert!(!unprintable('a'));
    }

    #[test]
    fn modifier_symbols_are_symbols() {
        assert!(symbol('\u{02fc}'));
    }

    #[test]
    fn some_weird_characters_are_not_symbols() {
        assert!(!symbol('\u{000a}'));
        assert!(!symbol('\u{0000}'));
        assert!(!symbol('\u{0019}'));
    }

    #[test]
    fn many_things_are_common() {
        assert_eq!(script('\u{02fc}'), Script::Common);
        assert_eq!(script('\u{000a}'), Script::Common);
        assert_eq!(script('\u{0000}'), Script::Common);
        assert_eq!(script('\u{0019}'), Script::Common);
    }

    #[test]
    fn a_is_ltr() {
        assert_eq!(directionality('a'), BidiClass::LeftToRight);
    }

    #[test]
    fn ka_is_rtl() {
        // U+07DE NKO LETTER KA
        assert_eq!(directionality('\u{07de}'), BidiClass::RightToLeft);
    }

    #[test]
    fn two_extreme_chars_have_different_dirs() {
        assert_eq!(directionality('\u{04c0}'), BidiClass::LeftToRight);
        assert_eq!(directionality('\u{CED6E}'), BidiClass::LeftToRight);
    }

    #[test]
    fn upside_down_question_mark_is_not_confusable() {
        assert!(!has_confusables("\u{00bf}"));
    }

    #[test]
    fn example_is_not_confusable() {
        assert!(!has_confusables("\u{000B}\u{06ab}\u{0004}\u{0024}"));
    }

    #[test]
    fn maths_r_is_confusable() {
        assert!(has_confusables("\u{211d}"));
    }

    #[test]
    fn double_quotes_are_confusable() {
        assert!(has_confusables("''"));
        assert!(has_confusables("\""));
    }

    #[test]
    fn some_cyrillic_is_confusable() {
        assert!(has_confusables("\u{04c0}"));
    }

    #[test]
    fn latin_letters_are_not_emoji() {
        assert!(!has_emoji(""));
        assert!(!has_emoji("aaa "));
        assert!(!has_emoji("bc"));
    }

    #[test]
    fn pile_of_poo_is_an_emoji() {
        assert!(has_emoji("\u{1F4A9}"));
        assert!(has_emoji("aaa \u{1F4A9}"));
        assert!(has_emoji("b\u{1F4A9}c"));
    }
}