unicode_security/
restriction_level.rs

1//! For detecting the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
2//! a string conforms to
3
4use crate::mixed_script::AugmentedScriptSet;
5use crate::GeneralSecurityProfile;
6use unicode_script::Script;
7
8#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Debug, Hash)]
9/// The [Restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
10/// a string conforms to
11pub enum RestrictionLevel {
12    /// https://www.unicode.org/reports/tr39/#ascii_only
13    ASCIIOnly,
14    /// https://www.unicode.org/reports/tr39/#single_script
15    SingleScript,
16    /// https://www.unicode.org/reports/tr39/#highly_restrictive
17    HighlyRestrictive,
18    /// https://www.unicode.org/reports/tr39/#moderately_restrictive
19    ModeratelyRestrictive,
20    /// https://www.unicode.org/reports/tr39/#minimally_restrictive
21    MinimallyRestrictive,
22    /// https://www.unicode.org/reports/tr39/#unrestricted
23    Unrestricted,
24}
25
26/// Utilities for determining which [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
27/// a string satisfies
28pub trait RestrictionLevelDetection: Sized {
29    /// Detect the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
30    ///
31    /// This will _not_ check identifier well-formedness, as different applications may have different notions of well-formedness
32    fn detect_restriction_level(self) -> RestrictionLevel;
33
34    /// Check if a string satisfies the supplied [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
35    ///
36    /// This will _not_ check identifier well-formedness, as different applications may have different notions of well-formedness
37    fn check_restriction_level(self, level: RestrictionLevel) -> bool {
38        self.detect_restriction_level() <= level
39    }
40}
41
42impl RestrictionLevelDetection for &'_ str {
43    fn detect_restriction_level(self) -> RestrictionLevel {
44        let mut ascii_only = true;
45        let mut set = AugmentedScriptSet::default();
46        let mut exclude_latin_set = AugmentedScriptSet::default();
47        for ch in self.chars() {
48            if !GeneralSecurityProfile::identifier_allowed(ch) {
49                return RestrictionLevel::Unrestricted;
50            }
51            if !ch.is_ascii() {
52                ascii_only = false;
53            }
54            let ch_set = ch.into();
55            set.intersect_with(ch_set);
56            if !ch_set.base.contains_script(Script::Latin) {
57                exclude_latin_set.intersect_with(ch_set);
58            }
59        }
60
61        if ascii_only {
62            return RestrictionLevel::ASCIIOnly;
63        } else if !set.is_empty() {
64            return RestrictionLevel::SingleScript;
65        } else if exclude_latin_set.kore || exclude_latin_set.hanb || exclude_latin_set.jpan {
66            return RestrictionLevel::HighlyRestrictive;
67        } else if exclude_latin_set.base.len() == 1 {
68            let script = exclude_latin_set.base.iter().next().unwrap();
69            if script.is_recommended() && script != Script::Cyrillic && script != Script::Greek {
70                return RestrictionLevel::ModeratelyRestrictive;
71            }
72        }
73        return RestrictionLevel::MinimallyRestrictive;
74    }
75}