fog_pack/validator/str.rs
1use super::*;
2use crate::element::*;
3use crate::error::{Error, Result};
4use regex::Regex;
5use serde::{Deserialize, Serialize};
6
7#[inline]
8fn is_false(v: &bool) -> bool {
9 !v
10}
11
12#[inline]
13fn u32_is_zero(v: &u32) -> bool {
14 *v == 0
15}
16
17#[inline]
18fn u32_is_max(v: &u32) -> bool {
19 *v == u32::MAX
20}
21
22#[inline]
23fn normalize_is_none(v: &Normalize) -> bool {
24 matches!(v, Normalize::None)
25}
26
27/// Validator for UTF-8 strings.
28///
29/// This validator type will only pass string values. Validation passes if:
30///
31/// - The value's length in bytes is less than or equal to the value in `max_len`.
32/// - The value's length in bytes is greater than or equal to the value in `min_len`.
33/// - The value's number of unicode characters is less than or equal to the value in `max_char`.
34/// - The value's number of unicode characters is greater than or equal to the value in `min_char`.
35/// - The value does not begin with any of the prefixes in the `ban_prefix` list.
36/// - The value does not end with any of the suffixes in the `ban_suffix` list.
37/// - The value does not contain any of the characters in the `ban_char` string.
38/// - If a regular expression is present in `matches`, the possibly-normalized value must match
39/// against the expression.
40/// - If the `in` list is not empty, the possibly-normalized value must be among the values in the list.
41/// - The possibly-normalized value must not be among the values in the `nin` list.
42///
43/// The `normalize` field may be set to `None`, `NFC`, or `NFKC`, corresponding to Unicode
44/// normalization forms. When checked for `in`, `nin`, `ban_prefix`, `ban_suffix`, `ban_char`, and
45/// `matches`, the value is first put into the selected normalization form, and any `in`, `nin`,
46/// `ban_prefix`, and `ban_suffix` list strings are normalized as well.
47///
48/// # Defaults
49///
50/// Fields that aren't specified for the validator use their defaults instead. The defaults for
51/// each field are:
52///
53/// - comment: ""
54/// - in_list: empty
55/// - nin_list: empty
56/// - matches: None
57/// - max_len: u32::MAX
58/// - min_len: 0
59/// - max_char: u32::MAX
60/// - min_char: 0
61/// - normalize: Normalize::None
62/// - ban_prefix: empty
63/// - ban_suffix: empty
64/// - ban_char: ""
65/// - query: false
66/// - regex: false
67/// - size: false
68///
69/// # Regular Expressions
70///
71/// Regular expressions can be set for StrValidator using the `matches` field, but should be used
72/// sparingly, and should generally be avoided if possible. If they must be used, be aware of their
73/// limitations due to their memory, computation, and general consistency issues.
74///
75/// Before you use regular expressions or try to work around the look-around limitations, consider
76/// whether or not your validation requirement can be fulfilled by using some combination of the
77/// `ban_prefix`, `ban_suffix`, `ban_char`, `in`, and `nin` fields.
78///
79/// Regular expression can rapidly use up a lot of memory when compiled. This is one of the reasons
80/// why it is inadvisable to accept and use unknown schemas without first checking for regexes. For
81/// queries, a schema will have some upper limit on the number of allowed regular expressions, in
82/// order to mitigate possible memory exhaustion.
83///
84/// Beyond their memory cost, regular expressions have a second problem: there's not really a
85/// universal standard for regular expressions; at least, not one that is rigidly followed in
86/// implementations. The Rust fog-pack library uses the [`regex`](https://crates.io/crates/regex)
87/// crate for regular expressions, supporting Perl-style expression syntax, unicode character
88/// classes, and flags for unicode support and case insensitivity. Look around and backreferences
89/// are *not* supported. It is hoped that other implementations will support the same syntax, with
90/// the same limitations on look around and backreferences.
91///
92/// Finally, because unicode support is enabled, it is possible to have a string that fails on one
93/// library version and succeeds on another due to Unicode versions changing their character class
94/// definitions. This is a corner case, but any schema writer should be aware of it as a
95/// possibility.
96///
97/// # Unicode NFC and NFKC
98///
99/// Unicode normalization can be tricky to get right. Strings are never required to be in a
100/// particular normalization form, as it may be that the creator or user of a string specifically
101/// wants no normalization, but a query or schema may desire it. To this end, normalization of the
102/// string being validated, as well as the `in` and `nin` lists' strings can all be done
103/// before running validation. This is settable through the `normalization` field, which can be
104/// `None`, `NFC`, or `NFKC`.
105///
106#[derive(Clone, Debug, Serialize, Deserialize)]
107#[serde(deny_unknown_fields, default)]
108pub struct StrValidator {
109 /// An optional comment explaining the validator.
110 #[serde(skip_serializing_if = "String::is_empty")]
111 pub comment: String,
112 /// A vector of specific allowed values, stored under the `in` field. If empty, this vector is not checked against.
113 #[serde(rename = "in", skip_serializing_if = "Vec::is_empty")]
114 pub in_list: Vec<String>,
115 /// A vector of specific unallowed values, stored under the `nin` field.
116 #[serde(rename = "nin", skip_serializing_if = "Vec::is_empty")]
117 pub nin_list: Vec<String>,
118 /// A regular expression that the value must match against.
119 #[serde(skip_serializing_if = "Option::is_none", with = "serde_regex")]
120 pub matches: Option<Box<Regex>>,
121 /// The maximum allowed number of bytes in the string value.
122 #[serde(skip_serializing_if = "u32_is_max")]
123 pub max_len: u32,
124 /// The minimum allowed number of bytes in the string value.
125 #[serde(skip_serializing_if = "u32_is_zero")]
126 pub min_len: u32,
127 /// The maximum allowed number of unicode characters in the string value.
128 #[serde(skip_serializing_if = "u32_is_max")]
129 pub max_char: u32,
130 /// The minimum allowed number of unicode characters in the string value.
131 #[serde(skip_serializing_if = "u32_is_zero")]
132 pub min_char: u32,
133 /// The Unicode normalization setting.
134 #[serde(skip_serializing_if = "normalize_is_none")]
135 pub normalize: Normalize,
136 /// Banned string prefixes.
137 #[serde(skip_serializing_if = "Vec::is_empty")]
138 pub ban_prefix: Vec<String>,
139 /// Banned string suffixes.
140 #[serde(skip_serializing_if = "Vec::is_empty")]
141 pub ban_suffix: Vec<String>,
142 /// Banned characters.
143 #[serde(skip_serializing_if = "String::is_empty")]
144 pub ban_char: String,
145 /// If true, queries against matching spots may have values in the `in` or `nin` lists.
146 #[serde(skip_serializing_if = "is_false")]
147 pub query: bool,
148 /// If true, queries against matching spots may use the `matches` value.
149 #[serde(skip_serializing_if = "is_false")]
150 pub regex: bool,
151 /// If true, queries against matching spots may set the `ban_prefix`, `ban_suffix`, and
152 /// `ban_char` values to non-defaults.
153 #[serde(skip_serializing_if = "is_false")]
154 pub ban: bool,
155 /// If true, queries against matching spots may set the `max_len`, `min_len`, `max_char`, and
156 /// `min_char` values to non-defaults.
157 #[serde(skip_serializing_if = "is_false")]
158 pub size: bool,
159}
160
161impl PartialEq for StrValidator {
162 fn eq(&self, rhs: &Self) -> bool {
163 (self.comment == rhs.comment)
164 && (self.in_list == rhs.in_list)
165 && (self.nin_list == rhs.nin_list)
166 && (self.max_len == rhs.max_len)
167 && (self.min_len == rhs.min_len)
168 && (self.max_char == rhs.max_char)
169 && (self.min_char == rhs.min_char)
170 && (self.normalize == rhs.normalize)
171 && (self.ban_prefix == rhs.ban_prefix)
172 && (self.ban_suffix == rhs.ban_suffix)
173 && (self.ban_char == rhs.ban_char)
174 && (self.query == rhs.query)
175 && (self.regex == rhs.regex)
176 && (self.size == rhs.size)
177 && (self.ban == rhs.ban)
178 && match (&self.matches, &rhs.matches) {
179 (None, None) => true,
180 (Some(_), None) => false,
181 (None, Some(_)) => false,
182 (Some(lhs), Some(rhs)) => lhs.as_str() == rhs.as_str(),
183 }
184 }
185}
186
187impl std::default::Default for StrValidator {
188 fn default() -> Self {
189 Self {
190 comment: String::new(),
191 in_list: Vec::new(),
192 nin_list: Vec::new(),
193 matches: None,
194 max_len: u32::MAX,
195 min_len: u32::MIN,
196 max_char: u32::MAX,
197 min_char: u32::MIN,
198 normalize: Normalize::None,
199 ban_prefix: Vec::new(),
200 ban_suffix: Vec::new(),
201 ban_char: String::new(),
202 query: false,
203 regex: false,
204 ban: false,
205 size: false,
206 }
207 }
208}
209
210impl StrValidator {
211 /// Make a new validator with the default configuration.
212 pub fn new() -> Self {
213 Self::default()
214 }
215
216 /// Set a comment for the validator.
217 pub fn comment(mut self, comment: impl Into<String>) -> Self {
218 self.comment = comment.into();
219 self
220 }
221
222 /// Set the maximum number of allowed bytes.
223 pub fn max_len(mut self, max_len: u32) -> Self {
224 self.max_len = max_len;
225 self
226 }
227
228 /// Set the minimum number of allowed bytes.
229 pub fn min_len(mut self, min_len: u32) -> Self {
230 self.min_len = min_len;
231 self
232 }
233
234 /// Set the maximum number of allowed characters.
235 pub fn max_char(mut self, max_char: u32) -> Self {
236 self.max_char = max_char;
237 self
238 }
239
240 /// Set the minimum number of allowed characters.
241 pub fn min_char(mut self, min_char: u32) -> Self {
242 self.min_char = min_char;
243 self
244 }
245
246 /// Set the unicode normalization form to use for `in`, `nin`, and `matches` checks.
247 pub fn normalize(mut self, normalize: Normalize) -> Self {
248 self.normalize = normalize;
249 self
250 }
251
252 /// Set the regular expression to check against.
253 pub fn matches(mut self, matches: Regex) -> Self {
254 self.matches = Some(Box::new(matches));
255 self
256 }
257
258 /// Add a value to the `in` list.
259 pub fn in_add(mut self, add: impl Into<String>) -> Self {
260 self.in_list.push(add.into());
261 self
262 }
263
264 /// Add a value to the `nin` list.
265 pub fn nin_add(mut self, add: impl Into<String>) -> Self {
266 self.nin_list.push(add.into());
267 self
268 }
269
270 /// Add a value to the `ban_prefix` list.
271 pub fn ban_prefix_add(mut self, add: impl Into<String>) -> Self {
272 self.ban_prefix.push(add.into());
273 self
274 }
275
276 /// Add a value to the `ban_suffix` list.
277 pub fn ban_suffix_add(mut self, add: impl Into<String>) -> Self {
278 self.ban_suffix.push(add.into());
279 self
280 }
281
282 /// Set the `ban_char` string.
283 pub fn ban_char(mut self, ban_char: impl Into<String>) -> Self {
284 self.ban_char = ban_char.into();
285 self
286 }
287
288 /// Set whether or not queries can use the `in` and `nin` lists.
289 pub fn query(mut self, query: bool) -> Self {
290 self.query = query;
291 self
292 }
293
294 /// Set whether or not queries can use the `matches` value.
295 pub fn regex(mut self, regex: bool) -> Self {
296 self.regex = regex;
297 self
298 }
299
300 /// Set whether or not queries can use the `ban_prefix`, `ban_suffix`, and `ban_char` values.
301 pub fn ban(mut self, ban: bool) -> Self {
302 self.ban = ban;
303 self
304 }
305
306 /// Set whether or not queries can use the `max_len`, `min_len`, `max_char`, and `min_char`
307 /// values.
308 pub fn size(mut self, ord: bool) -> Self {
309 self.size = ord;
310 self
311 }
312
313 /// Build this into a [`Validator`] enum.
314 pub fn build(self) -> Validator {
315 Validator::Str(Box::new(self))
316 }
317
318 pub(crate) fn validate(&self, parser: &mut Parser) -> Result<()> {
319 // Get element
320 let elem = parser
321 .next()
322 .ok_or_else(|| Error::FailValidate("expected a string".to_string()))??;
323 let val = if let Element::Str(v) = elem {
324 v
325 } else {
326 return Err(Error::FailValidate(format!(
327 "expected Str, got {}",
328 elem.name()
329 )));
330 };
331 self.validate_str(val)
332 }
333
334 pub(crate) fn validate_str(&self, val: &str) -> Result<()> {
335 // Length Checks
336 if (val.len() as u32) > self.max_len {
337 return Err(Error::FailValidate(
338 "String is longer than max_len".to_string(),
339 ));
340 }
341 if (val.len() as u32) < self.min_len {
342 return Err(Error::FailValidate(
343 "String is shorter than min_len".to_string(),
344 ));
345 }
346 if self.max_char < u32::MAX || self.min_char > 0 {
347 let len_char = bytecount::num_chars(val.as_bytes()) as u32;
348 if len_char > self.max_char {
349 return Err(Error::FailValidate(
350 "String is longer than max_len".to_string(),
351 ));
352 }
353 if len_char < self.min_char {
354 return Err(Error::FailValidate(
355 "String is shorter than min_len".to_string(),
356 ));
357 }
358 }
359
360 // Content checks
361 use unicode_normalization::{
362 is_nfc_quick, is_nfkc_quick, IsNormalized, UnicodeNormalization,
363 };
364 match self.normalize {
365 Normalize::None => {
366 if !self.in_list.is_empty() && !self.in_list.iter().any(|v| *v == val) {
367 return Err(Error::FailValidate(
368 "String is not on `in` list".to_string(),
369 ));
370 }
371 if self.nin_list.iter().any(|v| *v == val) {
372 return Err(Error::FailValidate("String is on `nin` list".to_string()));
373 }
374 if let Some(pre) = self.ban_prefix.iter().find(|v| val.starts_with(*v)) {
375 return Err(Error::FailValidate(format!(
376 "String begins with banned prefix {:?}",
377 pre
378 )));
379 }
380 if let Some(suf) = self.ban_suffix.iter().find(|v| val.ends_with(*v)) {
381 return Err(Error::FailValidate(format!(
382 "String ends with banned suffix {:?}",
383 suf
384 )));
385 }
386 if !self.ban_char.is_empty() {
387 if let Some(c) = val.chars().find(|c| self.ban_char.contains(*c)) {
388 return Err(Error::FailValidate(format!(
389 "String contains banned character {:?}",
390 c
391 )));
392 }
393 }
394 if let Some(ref regex) = self.matches {
395 if !regex.is_match(val) {
396 return Err(Error::FailValidate(
397 "String doesn't match regular expression".to_string(),
398 ));
399 }
400 }
401 }
402 Normalize::NFC => {
403 let temp_string: String;
404 let val = match is_nfc_quick(val.chars()) {
405 IsNormalized::Yes => val,
406 _ => {
407 temp_string = val.nfc().collect::<String>();
408 temp_string.as_str()
409 }
410 };
411
412 if !self.in_list.is_empty() && !self.in_list.iter().any(|v| v.nfc().eq(val.chars()))
413 {
414 return Err(Error::FailValidate(
415 "NFC String is not on `in` list".to_string(),
416 ));
417 }
418 if self.nin_list.iter().any(|v| v.nfc().eq(val.chars())) {
419 return Err(Error::FailValidate(
420 "NFC String is on `nin` list".to_string(),
421 ));
422 }
423 if let Some(pre) = self
424 .ban_prefix
425 .iter()
426 .find(|v| v.nfc().zip(val.chars()).all(|(vc, valc)| vc == valc))
427 {
428 return Err(Error::FailValidate(format!(
429 "NFC String begins with banned prefix {:?}",
430 pre
431 )));
432 }
433 if !self.ban_suffix.is_empty() {
434 let mut temp = String::new();
435 if self.ban_suffix.iter().any(|v| {
436 temp.clear();
437 temp.extend(v.nfc());
438 val.ends_with(&temp)
439 }) {
440 return Err(Error::FailValidate(format!(
441 "NFC String ends with banned suffix {:?}",
442 temp
443 )));
444 }
445 }
446 if !self.ban_char.is_empty() {
447 if let Some(c) = val.chars().find(|c| self.ban_char.contains(*c)) {
448 return Err(Error::FailValidate(format!(
449 "NFC String contains banned character {:?}",
450 c
451 )));
452 }
453 }
454 if let Some(ref regex) = self.matches {
455 if !regex.is_match(val) {
456 return Err(Error::FailValidate(
457 "String doesn't match regular expression".to_string(),
458 ));
459 }
460 }
461 }
462 Normalize::NFKC => {
463 let temp_string: String;
464 let val = match is_nfkc_quick(val.chars()) {
465 IsNormalized::Yes => val,
466 _ => {
467 temp_string = val.nfkc().collect::<String>();
468 temp_string.as_str()
469 }
470 };
471
472 if !self.in_list.is_empty()
473 && !self.in_list.iter().any(|v| v.nfkc().eq(val.chars()))
474 {
475 return Err(Error::FailValidate(
476 "NFKC String is not on `in` list".to_string(),
477 ));
478 }
479 if self.nin_list.iter().any(|v| v.nfkc().eq(val.chars())) {
480 return Err(Error::FailValidate(
481 "NFKC String is on `nin` list".to_string(),
482 ));
483 }
484 if let Some(pre) = self
485 .ban_prefix
486 .iter()
487 .find(|v| v.nfkc().zip(val.chars()).all(|(vc, valc)| vc == valc))
488 {
489 return Err(Error::FailValidate(format!(
490 "NFKC String begins with banned prefix {:?}",
491 pre
492 )));
493 }
494 if !self.ban_suffix.is_empty() {
495 let mut temp = String::new();
496 if self.ban_suffix.iter().any(|v| {
497 temp.clear();
498 temp.extend(v.nfkc());
499 val.ends_with(&temp)
500 }) {
501 return Err(Error::FailValidate(format!(
502 "NFKC String ends with banned suffix {:?}",
503 temp
504 )));
505 }
506 }
507 if !self.ban_char.is_empty() {
508 if let Some(c) = val.chars().find(|c| self.ban_char.contains(*c)) {
509 return Err(Error::FailValidate(format!(
510 "NFKC String contains banned character {:?}",
511 c
512 )));
513 }
514 }
515 if let Some(ref regex) = self.matches {
516 if !regex.is_match(val) {
517 return Err(Error::FailValidate(
518 "NFKC String doesn't match regular expression".to_string(),
519 ));
520 }
521 }
522 }
523 }
524 Ok(())
525 }
526
527 pub(crate) fn query_check_str(&self, other: &Self) -> bool {
528 (self.query || (other.in_list.is_empty() && other.nin_list.is_empty()))
529 && (self.regex || other.matches.is_none())
530 && (self.ban
531 || (other.ban_prefix.is_empty()
532 && other.ban_suffix.is_empty()
533 && other.ban_char.is_empty()))
534 && (self.size
535 || (u32_is_max(&other.max_len)
536 && u32_is_zero(&other.min_len)
537 && u32_is_max(&other.max_char)
538 && u32_is_zero(&other.min_char)))
539 }
540
541 pub(crate) fn query_check(&self, other: &Validator) -> bool {
542 match other {
543 Validator::Str(other) => self.query_check_str(other),
544 Validator::Multi(list) => list.iter().all(|other| match other {
545 Validator::Str(other) => self.query_check_str(other),
546 _ => false,
547 }),
548 Validator::Any => true,
549 _ => false,
550 }
551 }
552}