use crate::data;
use crate::i_dunno_unicode::{
directionality, has_confusables, has_emoji, script, symbol, unprintable,
};
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
pub enum ConfusionLevel {
Minimum,
Satisfactory,
Delightful,
}
pub fn confusion_level(input: &str) -> Option<ConfusionLevel> {
if !is_minimum(input) {
None
} else if !is_satisfactory(input) {
Some(ConfusionLevel::Minimum)
} else if !is_delightful(input) {
Some(ConfusionLevel::Satisfactory)
} else {
Some(ConfusionLevel::Delightful)
}
}
fn is_minimum(input: &str) -> bool {
has_multi_octet_code_point(input) &&
has_idna2008_disallowed_code_point(input)
}
fn is_satisfactory(input: &str) -> bool {
let mut sats = 0;
if has_unprintable_code_point(input) {
sats += 1;
}
if has_multiple_scripts(input) {
sats += 1;
}
if sats == 0 {
return false;
} else if sats == 2 {
return true;
}
if has_symbol(input) {
sats += 1;
}
sats >= 2
}
fn is_delightful(input: &str) -> bool {
let mut sats = 0;
if has_different_directionalities(input) {
sats += 1;
}
if has_emoji(input) {
sats += 1;
}
if sats == 0 {
return false;
} else if sats == 2 {
return true;
}
if has_confusables(input) {
sats += 1;
}
sats >= 2
}
fn has_multi_octet_code_point(input: &str) -> bool {
input.chars().any(|ch| ch.len_utf8() > 1)
}
fn has_idna2008_disallowed_code_point(input: &str) -> bool {
input.chars().any(data::idna_disallowed)
}
fn has_unprintable_code_point(input: &str) -> bool {
input.chars().any(unprintable)
}
fn has_multiple_scripts(input: &str) -> bool {
let mut scr = None;
for ch in input.chars() {
let new_scr = script(ch);
if let Some(scr) = scr {
if scr != new_scr {
return true;
}
}
scr = Some(new_scr);
}
return false;
}
fn has_symbol(input: &str) -> bool {
input.chars().any(symbol)
}
fn has_different_directionalities(input: &str) -> bool {
let mut dir = None;
for ch in input.chars() {
let new_dir = directionality(ch);
if let Some(dir) = dir {
if dir != new_dir {
return true;
}
}
dir = Some(new_dir);
}
false
}
#[cfg(test)]
mod tests {
use super::*;
use unicode_script::Script;
#[test]
fn confusion_levels_are_ordered() {
assert!(ConfusionLevel::Satisfactory > ConfusionLevel::Minimum);
assert!(ConfusionLevel::Delightful > ConfusionLevel::Minimum);
assert!(ConfusionLevel::Delightful > ConfusionLevel::Satisfactory);
}
#[test]
fn empty_string_does_not_contain_multi_octet_code_points() {
assert!(!has_multi_octet_code_point(""));
}
#[test]
fn ascii_string_does_not_contain_multi_octet_code_points() {
assert!(!has_multi_octet_code_point("abc{}-+01"));
}
#[test]
fn e_acute_does_contain_multi_octet_code_points() {
assert!(has_multi_octet_code_point("\u{E9}"));
assert!(has_multi_octet_code_point("abc{}-\u{E9}+01"));
}
#[test]
fn zero_code_point_makes_string_idna_disallowed() {
assert!(!has_idna2008_disallowed_code_point("foobar"));
assert!(has_idna2008_disallowed_code_point("foo\u{00}bar"));
}
#[test]
fn bel_code_point_is_unprintable() {
assert!(!has_unprintable_code_point("foobar"));
assert!(has_unprintable_code_point("foo\u{07}bar"));
}
#[test]
fn form_feed_is_inprintable() {
assert!(!has_unprintable_code_point("012345"));
assert!(has_unprintable_code_point("012\u{0C}345"));
}
#[test]
fn single_char_has_one_directionality() {
assert!(!has_different_directionalities("a"));
}
#[test]
fn many_chars_with_same_directionality_are_not_enough() {
assert!(!has_different_directionalities("abcdef"));
assert!(!has_different_directionalities("\u{07d2}\u{07de}\u{07cf}"));
}
#[test]
fn chars_with_different_directionalities_are_detected() {
assert!(has_different_directionalities("a\u{07d2}"));
}
#[test]
fn empty_string_is_not_confusing() {
assert_eq!(confusion_level(""), None);
}
#[test]
fn normal_string_is_not_confusing() {
assert_eq!(confusion_level("some-allowed-Chars-0"), None);
}
#[test]
fn string_with_multibyte_and_disallowed_char_is_minimum() {
assert_eq!(
confusion_level("foo\u{037e}bar"),
Some(ConfusionLevel::Minimum)
);
}
#[test]
fn string_with_multibyte_and_disallowed_char_is_delightful() {
assert_eq!(
confusion_level("foo\u{e0075}bar+"),
Some(ConfusionLevel::Delightful)
);
}
#[test]
fn string_with_just_bel_is_not_confusing() {
assert_eq!(confusion_level("foo\u{07}bar"), None);
}
#[test]
fn string_with_bel_plus_min_reqs_is_delightful() {
assert_eq!(
confusion_level("foo\u{07}bar\u{e0075}\u{2208}"),
Some(ConfusionLevel::Delightful)
);
}
#[test]
fn string_without_multiple_scripts_is_delightful() {
assert_eq!(
confusion_level("\u{0007}\u{0000}\u{1F4A9}\u{2208}"),
Some(ConfusionLevel::Delightful)
);
}
#[test]
fn string_without_symbols_is_delightful() {
assert_eq!(
confusion_level("\u{0007}\u{0000}\u{00E9}a"),
Some(ConfusionLevel::Delightful)
);
}
#[test]
fn string_without_symbols_or_multiple_scripts_is_not_satisfactory() {
let s = "\u{0007}\u{0000}\u{0084}";
assert_eq!(
scripts(s),
vec![Script::Common, Script::Common, Script::Common]
);
assert_eq!(confusion_level(s), Some(ConfusionLevel::Minimum));
}
#[test]
fn string_with_symbols_etc_is_satisfactory() {
assert_eq!(
confusion_level("\u{000B}\u{06ab}\u{0004}\u{0024}"),
Some(ConfusionLevel::Satisfactory)
);
}
#[test]
fn string_with_symbols_etc_is_delightful() {
assert_eq!(
confusion_level("\u{0007}\u{0000}\u{1F4A9}\u{2208}a"),
Some(ConfusionLevel::Delightful)
);
}
fn scripts(input: &str) -> Vec<Script> {
input.chars().map(|c| script(c)).collect()
}
}