fog_pack/validator/
str.rs

1use super::*;
2use crate::element::*;
3use crate::error::{Error, Result};
4use regex::Regex;
5use serde::{Deserialize, Serialize};
6
7#[inline]
8fn is_false(v: &bool) -> bool {
9    !v
10}
11
12#[inline]
13fn u32_is_zero(v: &u32) -> bool {
14    *v == 0
15}
16
17#[inline]
18fn u32_is_max(v: &u32) -> bool {
19    *v == u32::MAX
20}
21
22#[inline]
23fn normalize_is_none(v: &Normalize) -> bool {
24    matches!(v, Normalize::None)
25}
26
27/// Validator for UTF-8 strings.
28///
29/// This validator type will only pass string values. Validation passes if:
30///
31/// - The value's length in bytes is less than or equal to the value in `max_len`.
32/// - The value's length in bytes is greater than or equal to the value in `min_len`.
33/// - The value's number of unicode characters is less than or equal to the value in `max_char`.
34/// - The value's number of unicode characters is greater than or equal to the value in `min_char`.
35/// - The value does not begin with any of the prefixes in the `ban_prefix` list.
36/// - The value does not end with any of the suffixes in the `ban_suffix` list.
37/// - The value does not contain any of the characters in the `ban_char` string.
38/// - If a regular expression is present in `matches`, the possibly-normalized value must match
39///     against the expression.
40/// - If the `in` list is not empty, the possibly-normalized value must be among the values in the list.
41/// - The possibly-normalized value must not be among the values in the `nin` list.
42///
43/// The `normalize` field may be set to `None`, `NFC`, or `NFKC`, corresponding to Unicode
44/// normalization forms. When checked for `in`, `nin`, `ban_prefix`, `ban_suffix`, `ban_char`, and
45/// `matches`, the value is first put into the selected normalization form, and any `in`, `nin`,
46/// `ban_prefix`, and `ban_suffix` list strings are normalized as well.
47///
48/// # Defaults
49///
50/// Fields that aren't specified for the validator use their defaults instead. The defaults for
51/// each field are:
52///
53/// - comment: ""
54/// - in_list: empty
55/// - nin_list: empty
56/// - matches: None
57/// - max_len: u32::MAX
58/// - min_len: 0
59/// - max_char: u32::MAX
60/// - min_char: 0
61/// - normalize: Normalize::None
62/// - ban_prefix: empty
63/// - ban_suffix: empty
64/// - ban_char: ""
65/// - query: false
66/// - regex: false
67/// - size: false
68///
69/// # Regular Expressions
70///
71/// Regular expressions can be set for StrValidator using the `matches` field, but should be used
72/// sparingly, and should generally be avoided if possible. If they must be used, be aware of their
73/// limitations due to their memory, computation, and general consistency issues.
74///
75/// Before you use regular expressions or try to work around the look-around limitations, consider
76/// whether or not your validation requirement can be fulfilled by using some combination of the
77/// `ban_prefix`, `ban_suffix`, `ban_char`, `in`, and `nin` fields.
78///
79/// Regular expression can rapidly use up a lot of memory when compiled. This is one of the reasons
80/// why it is inadvisable to accept and use unknown schemas without first checking for regexes. For
81/// queries, a schema will have some upper limit on the number of allowed regular expressions, in
82/// order to mitigate possible memory exhaustion.
83///
84/// Beyond their memory cost, regular expressions have a second problem: there's not really a
85/// universal standard for regular expressions; at least, not one that is rigidly followed in
86/// implementations. The Rust fog-pack library uses the [`regex`](https://crates.io/crates/regex)
87/// crate for regular expressions, supporting Perl-style expression syntax, unicode character
88/// classes, and flags for unicode support and case insensitivity. Look around and backreferences
89/// are *not* supported. It is hoped that other implementations will support the same syntax, with
90/// the same limitations on look around and backreferences.
91///
92/// Finally, because unicode support is enabled, it is possible to have a string that fails on one
93/// library version and succeeds on another due to Unicode versions changing their character class
94/// definitions. This is a corner case, but any schema writer should be aware of it as a
95/// possibility.
96///
97/// # Unicode NFC and NFKC
98///
99/// Unicode normalization can be tricky to get right. Strings are never required to be in a
100/// particular normalization form, as it may be that the creator or user of a string specifically
101/// wants no normalization, but a query or schema may desire it. To this end, normalization of the
102/// string being validated, as well as the `in` and `nin` lists' strings can all be done
103/// before running validation. This is settable through the `normalization` field, which can be
104/// `None`, `NFC`, or `NFKC`.
105///
106#[derive(Clone, Debug, Serialize, Deserialize)]
107#[serde(deny_unknown_fields, default)]
108pub struct StrValidator {
109    /// An optional comment explaining the validator.
110    #[serde(skip_serializing_if = "String::is_empty")]
111    pub comment: String,
112    /// A vector of specific allowed values, stored under the `in` field. If empty, this vector is not checked against.
113    #[serde(rename = "in", skip_serializing_if = "Vec::is_empty")]
114    pub in_list: Vec<String>,
115    /// A vector of specific unallowed values, stored under the `nin` field.
116    #[serde(rename = "nin", skip_serializing_if = "Vec::is_empty")]
117    pub nin_list: Vec<String>,
118    /// A regular expression that the value must match against.
119    #[serde(skip_serializing_if = "Option::is_none", with = "serde_regex")]
120    pub matches: Option<Box<Regex>>,
121    /// The maximum allowed number of bytes in the string value.
122    #[serde(skip_serializing_if = "u32_is_max")]
123    pub max_len: u32,
124    /// The minimum allowed number of bytes in the string value.
125    #[serde(skip_serializing_if = "u32_is_zero")]
126    pub min_len: u32,
127    /// The maximum allowed number of unicode characters in the string value.
128    #[serde(skip_serializing_if = "u32_is_max")]
129    pub max_char: u32,
130    /// The minimum allowed number of unicode characters in the string value.
131    #[serde(skip_serializing_if = "u32_is_zero")]
132    pub min_char: u32,
133    /// The Unicode normalization setting.
134    #[serde(skip_serializing_if = "normalize_is_none")]
135    pub normalize: Normalize,
136    /// Banned string prefixes.
137    #[serde(skip_serializing_if = "Vec::is_empty")]
138    pub ban_prefix: Vec<String>,
139    /// Banned string suffixes.
140    #[serde(skip_serializing_if = "Vec::is_empty")]
141    pub ban_suffix: Vec<String>,
142    /// Banned characters.
143    #[serde(skip_serializing_if = "String::is_empty")]
144    pub ban_char: String,
145    /// If true, queries against matching spots may have values in the `in` or `nin` lists.
146    #[serde(skip_serializing_if = "is_false")]
147    pub query: bool,
148    /// If true, queries against matching spots may use the `matches` value.
149    #[serde(skip_serializing_if = "is_false")]
150    pub regex: bool,
151    /// If true, queries against matching spots may set the `ban_prefix`, `ban_suffix`, and
152    /// `ban_char` values to non-defaults.
153    #[serde(skip_serializing_if = "is_false")]
154    pub ban: bool,
155    /// If true, queries against matching spots may set the `max_len`, `min_len`, `max_char`, and
156    /// `min_char` values to non-defaults.
157    #[serde(skip_serializing_if = "is_false")]
158    pub size: bool,
159}
160
161impl PartialEq for StrValidator {
162    fn eq(&self, rhs: &Self) -> bool {
163        (self.comment == rhs.comment)
164            && (self.in_list == rhs.in_list)
165            && (self.nin_list == rhs.nin_list)
166            && (self.max_len == rhs.max_len)
167            && (self.min_len == rhs.min_len)
168            && (self.max_char == rhs.max_char)
169            && (self.min_char == rhs.min_char)
170            && (self.normalize == rhs.normalize)
171            && (self.ban_prefix == rhs.ban_prefix)
172            && (self.ban_suffix == rhs.ban_suffix)
173            && (self.ban_char == rhs.ban_char)
174            && (self.query == rhs.query)
175            && (self.regex == rhs.regex)
176            && (self.size == rhs.size)
177            && (self.ban == rhs.ban)
178            && match (&self.matches, &rhs.matches) {
179                (None, None) => true,
180                (Some(_), None) => false,
181                (None, Some(_)) => false,
182                (Some(lhs), Some(rhs)) => lhs.as_str() == rhs.as_str(),
183            }
184    }
185}
186
187impl std::default::Default for StrValidator {
188    fn default() -> Self {
189        Self {
190            comment: String::new(),
191            in_list: Vec::new(),
192            nin_list: Vec::new(),
193            matches: None,
194            max_len: u32::MAX,
195            min_len: u32::MIN,
196            max_char: u32::MAX,
197            min_char: u32::MIN,
198            normalize: Normalize::None,
199            ban_prefix: Vec::new(),
200            ban_suffix: Vec::new(),
201            ban_char: String::new(),
202            query: false,
203            regex: false,
204            ban: false,
205            size: false,
206        }
207    }
208}
209
210impl StrValidator {
211    /// Make a new validator with the default configuration.
212    pub fn new() -> Self {
213        Self::default()
214    }
215
216    /// Set a comment for the validator.
217    pub fn comment(mut self, comment: impl Into<String>) -> Self {
218        self.comment = comment.into();
219        self
220    }
221
222    /// Set the maximum number of allowed bytes.
223    pub fn max_len(mut self, max_len: u32) -> Self {
224        self.max_len = max_len;
225        self
226    }
227
228    /// Set the minimum number of allowed bytes.
229    pub fn min_len(mut self, min_len: u32) -> Self {
230        self.min_len = min_len;
231        self
232    }
233
234    /// Set the maximum number of allowed characters.
235    pub fn max_char(mut self, max_char: u32) -> Self {
236        self.max_char = max_char;
237        self
238    }
239
240    /// Set the minimum number of allowed characters.
241    pub fn min_char(mut self, min_char: u32) -> Self {
242        self.min_char = min_char;
243        self
244    }
245
246    /// Set the unicode normalization form to use for `in`, `nin`, and `matches` checks.
247    pub fn normalize(mut self, normalize: Normalize) -> Self {
248        self.normalize = normalize;
249        self
250    }
251
252    /// Set the regular expression to check against.
253    pub fn matches(mut self, matches: Regex) -> Self {
254        self.matches = Some(Box::new(matches));
255        self
256    }
257
258    /// Add a value to the `in` list.
259    pub fn in_add(mut self, add: impl Into<String>) -> Self {
260        self.in_list.push(add.into());
261        self
262    }
263
264    /// Add a value to the `nin` list.
265    pub fn nin_add(mut self, add: impl Into<String>) -> Self {
266        self.nin_list.push(add.into());
267        self
268    }
269
270    /// Add a value to the `ban_prefix` list.
271    pub fn ban_prefix_add(mut self, add: impl Into<String>) -> Self {
272        self.ban_prefix.push(add.into());
273        self
274    }
275
276    /// Add a value to the `ban_suffix` list.
277    pub fn ban_suffix_add(mut self, add: impl Into<String>) -> Self {
278        self.ban_suffix.push(add.into());
279        self
280    }
281
282    /// Set the `ban_char` string.
283    pub fn ban_char(mut self, ban_char: impl Into<String>) -> Self {
284        self.ban_char = ban_char.into();
285        self
286    }
287
288    /// Set whether or not queries can use the `in` and `nin` lists.
289    pub fn query(mut self, query: bool) -> Self {
290        self.query = query;
291        self
292    }
293
294    /// Set whether or not queries can use the `matches` value.
295    pub fn regex(mut self, regex: bool) -> Self {
296        self.regex = regex;
297        self
298    }
299
300    /// Set whether or not queries can use the `ban_prefix`, `ban_suffix`, and `ban_char` values.
301    pub fn ban(mut self, ban: bool) -> Self {
302        self.ban = ban;
303        self
304    }
305
306    /// Set whether or not queries can use the `max_len`, `min_len`, `max_char`, and `min_char`
307    /// values.
308    pub fn size(mut self, ord: bool) -> Self {
309        self.size = ord;
310        self
311    }
312
313    /// Build this into a [`Validator`] enum.
314    pub fn build(self) -> Validator {
315        Validator::Str(Box::new(self))
316    }
317
318    pub(crate) fn validate(&self, parser: &mut Parser) -> Result<()> {
319        // Get element
320        let elem = parser
321            .next()
322            .ok_or_else(|| Error::FailValidate("expected a string".to_string()))??;
323        let val = if let Element::Str(v) = elem {
324            v
325        } else {
326            return Err(Error::FailValidate(format!(
327                "expected Str, got {}",
328                elem.name()
329            )));
330        };
331        self.validate_str(val)
332    }
333
334    pub(crate) fn validate_str(&self, val: &str) -> Result<()> {
335        // Length Checks
336        if (val.len() as u32) > self.max_len {
337            return Err(Error::FailValidate(
338                "String is longer than max_len".to_string(),
339            ));
340        }
341        if (val.len() as u32) < self.min_len {
342            return Err(Error::FailValidate(
343                "String is shorter than min_len".to_string(),
344            ));
345        }
346        if self.max_char < u32::MAX || self.min_char > 0 {
347            let len_char = bytecount::num_chars(val.as_bytes()) as u32;
348            if len_char > self.max_char {
349                return Err(Error::FailValidate(
350                    "String is longer than max_len".to_string(),
351                ));
352            }
353            if len_char < self.min_char {
354                return Err(Error::FailValidate(
355                    "String is shorter than min_len".to_string(),
356                ));
357            }
358        }
359
360        // Content checks
361        use unicode_normalization::{
362            is_nfc_quick, is_nfkc_quick, IsNormalized, UnicodeNormalization,
363        };
364        match self.normalize {
365            Normalize::None => {
366                if !self.in_list.is_empty() && !self.in_list.iter().any(|v| *v == val) {
367                    return Err(Error::FailValidate(
368                        "String is not on `in` list".to_string(),
369                    ));
370                }
371                if self.nin_list.iter().any(|v| *v == val) {
372                    return Err(Error::FailValidate("String is on `nin` list".to_string()));
373                }
374                if let Some(pre) = self.ban_prefix.iter().find(|v| val.starts_with(*v)) {
375                    return Err(Error::FailValidate(format!(
376                        "String begins with banned prefix {:?}",
377                        pre
378                    )));
379                }
380                if let Some(suf) = self.ban_suffix.iter().find(|v| val.ends_with(*v)) {
381                    return Err(Error::FailValidate(format!(
382                        "String ends with banned suffix {:?}",
383                        suf
384                    )));
385                }
386                if !self.ban_char.is_empty() {
387                    if let Some(c) = val.chars().find(|c| self.ban_char.contains(*c)) {
388                        return Err(Error::FailValidate(format!(
389                            "String contains banned character {:?}",
390                            c
391                        )));
392                    }
393                }
394                if let Some(ref regex) = self.matches {
395                    if !regex.is_match(val) {
396                        return Err(Error::FailValidate(
397                            "String doesn't match regular expression".to_string(),
398                        ));
399                    }
400                }
401            }
402            Normalize::NFC => {
403                let temp_string: String;
404                let val = match is_nfc_quick(val.chars()) {
405                    IsNormalized::Yes => val,
406                    _ => {
407                        temp_string = val.nfc().collect::<String>();
408                        temp_string.as_str()
409                    }
410                };
411
412                if !self.in_list.is_empty() && !self.in_list.iter().any(|v| v.nfc().eq(val.chars()))
413                {
414                    return Err(Error::FailValidate(
415                        "NFC String is not on `in` list".to_string(),
416                    ));
417                }
418                if self.nin_list.iter().any(|v| v.nfc().eq(val.chars())) {
419                    return Err(Error::FailValidate(
420                        "NFC String is on `nin` list".to_string(),
421                    ));
422                }
423                if let Some(pre) = self
424                    .ban_prefix
425                    .iter()
426                    .find(|v| v.nfc().zip(val.chars()).all(|(vc, valc)| vc == valc))
427                {
428                    return Err(Error::FailValidate(format!(
429                        "NFC String begins with banned prefix {:?}",
430                        pre
431                    )));
432                }
433                if !self.ban_suffix.is_empty() {
434                    let mut temp = String::new();
435                    if self.ban_suffix.iter().any(|v| {
436                        temp.clear();
437                        temp.extend(v.nfc());
438                        val.ends_with(&temp)
439                    }) {
440                        return Err(Error::FailValidate(format!(
441                            "NFC String ends with banned suffix {:?}",
442                            temp
443                        )));
444                    }
445                }
446                if !self.ban_char.is_empty() {
447                    if let Some(c) = val.chars().find(|c| self.ban_char.contains(*c)) {
448                        return Err(Error::FailValidate(format!(
449                            "NFC String contains banned character {:?}",
450                            c
451                        )));
452                    }
453                }
454                if let Some(ref regex) = self.matches {
455                    if !regex.is_match(val) {
456                        return Err(Error::FailValidate(
457                            "String doesn't match regular expression".to_string(),
458                        ));
459                    }
460                }
461            }
462            Normalize::NFKC => {
463                let temp_string: String;
464                let val = match is_nfkc_quick(val.chars()) {
465                    IsNormalized::Yes => val,
466                    _ => {
467                        temp_string = val.nfkc().collect::<String>();
468                        temp_string.as_str()
469                    }
470                };
471
472                if !self.in_list.is_empty()
473                    && !self.in_list.iter().any(|v| v.nfkc().eq(val.chars()))
474                {
475                    return Err(Error::FailValidate(
476                        "NFKC String is not on `in` list".to_string(),
477                    ));
478                }
479                if self.nin_list.iter().any(|v| v.nfkc().eq(val.chars())) {
480                    return Err(Error::FailValidate(
481                        "NFKC String is on `nin` list".to_string(),
482                    ));
483                }
484                if let Some(pre) = self
485                    .ban_prefix
486                    .iter()
487                    .find(|v| v.nfkc().zip(val.chars()).all(|(vc, valc)| vc == valc))
488                {
489                    return Err(Error::FailValidate(format!(
490                        "NFKC String begins with banned prefix {:?}",
491                        pre
492                    )));
493                }
494                if !self.ban_suffix.is_empty() {
495                    let mut temp = String::new();
496                    if self.ban_suffix.iter().any(|v| {
497                        temp.clear();
498                        temp.extend(v.nfkc());
499                        val.ends_with(&temp)
500                    }) {
501                        return Err(Error::FailValidate(format!(
502                            "NFKC String ends with banned suffix {:?}",
503                            temp
504                        )));
505                    }
506                }
507                if !self.ban_char.is_empty() {
508                    if let Some(c) = val.chars().find(|c| self.ban_char.contains(*c)) {
509                        return Err(Error::FailValidate(format!(
510                            "NFKC String contains banned character {:?}",
511                            c
512                        )));
513                    }
514                }
515                if let Some(ref regex) = self.matches {
516                    if !regex.is_match(val) {
517                        return Err(Error::FailValidate(
518                            "NFKC String doesn't match regular expression".to_string(),
519                        ));
520                    }
521                }
522            }
523        }
524        Ok(())
525    }
526
527    pub(crate) fn query_check_str(&self, other: &Self) -> bool {
528        (self.query || (other.in_list.is_empty() && other.nin_list.is_empty()))
529            && (self.regex || other.matches.is_none())
530            && (self.ban
531                || (other.ban_prefix.is_empty()
532                    && other.ban_suffix.is_empty()
533                    && other.ban_char.is_empty()))
534            && (self.size
535                || (u32_is_max(&other.max_len)
536                    && u32_is_zero(&other.min_len)
537                    && u32_is_max(&other.max_char)
538                    && u32_is_zero(&other.min_char)))
539    }
540
541    pub(crate) fn query_check(&self, other: &Validator) -> bool {
542        match other {
543            Validator::Str(other) => self.query_check_str(other),
544            Validator::Multi(list) => list.iter().all(|other| match other {
545                Validator::Str(other) => self.query_check_str(other),
546                _ => false,
547            }),
548            Validator::Any => true,
549            _ => false,
550        }
551    }
552}