assemblyline_models/types/
strings.rs

1use std::borrow::Cow;
2use std::collections::HashSet;
3use std::marker::PhantomData;
4use std::str::FromStr;
5use std::sync::LazyLock;
6
7use idna::domain_to_ascii;
8use regex::Regex;
9use serde::{Deserialize, Serialize};
10use serde_with::{DeserializeFromStr, SerializeDisplay};
11use struct_metadata::Described;
12use unicode_normalization::UnicodeNormalization;
13use constcat::concat;
14
15use crate::types::net_static::TLDS_SPECIAL_BY_DOMAIN;
16use crate::{ElasticMeta, ModelError};
17
18
19/// A perminantly interned string that holds the name or catagory of a service.
20/// Can be dereferenced to the underlying string.
21/// Here intern gives us fast comparison and copy with the cost of unreclaimable memory.
22#[derive(Debug, Copy, Clone, Serialize, PartialEq, Eq, Hash, PartialOrd, Ord)]
23pub struct ServiceName(internment::Intern<String>);
24
25impl std::fmt::Display for ServiceName {
26    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27        self.0.fmt(f)
28    }
29}
30
31impl std::ops::Deref for ServiceName {
32    type Target = str;
33
34    fn deref(&self) -> &Self::Target {
35        &self.0
36    }
37}
38
39
40impl<'de> Deserialize<'de> for ServiceName {
41    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
42    where
43        D: serde::Deserializer<'de> 
44    {
45        struct Visitor {}
46
47        impl<'de> serde::de::Visitor<'de> for Visitor {
48            type Value = ServiceName;
49
50            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
51                write!(formatter, "expected a string holding a service name or catagory")
52            }
53
54            fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
55            where
56                E: serde::de::Error,
57            {
58                Ok(ServiceName(internment::Intern::from_ref(s)))
59            }
60        }
61        deserializer.deserialize_str(Visitor{})
62    }
63}
64
65impl Described<ElasticMeta> for ServiceName {
66    fn metadata() -> struct_metadata::Descriptor<ElasticMeta> {
67        String::metadata()
68    }
69}
70
71impl ServiceName {
72    pub fn from_string(value: String) -> Self {
73        Self(internment::Intern::new(value))
74    }
75}
76
77impl From<&str> for ServiceName {
78    fn from(value: &str) -> Self {
79        Self(internment::Intern::from_ref(value))
80    }
81}
82
83/// A string that maps to a keyword field in elasticsearch.
84/// 
85/// This is the default behaviour for a String in a mapped struct, the only reason
86/// to use this over a standard String is cases where the 'mapping' field has been overwritten
87/// by a container and the more explicit 'mapping' this provided is needed to reassert
88/// the keyword type.
89/// 
90/// Example:
91///         #[metadata(store=false, mapping="flattenedobject")]
92///         pub safelisted_tags: HashMap<String, Vec<Keyword>>,
93/// 
94/// In that example, if the inner Keyword was String the entire HashMap would have its 
95/// mapping set to 'flattenedobject', the inner Keyword more explicitly overrides this.
96#[derive(Debug, Serialize, Deserialize, Described, Clone, PartialEq, Eq, PartialOrd, Ord)]
97#[metadata_type(ElasticMeta)]
98#[metadata(mapping="keyword")]
99pub struct Keyword(String);
100
101impl std::fmt::Display for Keyword {
102    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
103        f.write_str(&self.0)
104    }
105}
106
107impl std::ops::Deref for Keyword {
108    type Target = str;
109
110    fn deref(&self) -> &Self::Target {
111        &self.0
112    }
113}
114
115impl From<String> for Keyword {
116    fn from(s: String) -> Self {
117        Keyword(s)
118    }
119}
120
121impl From<&str> for Keyword {
122    fn from(s: &str) -> Self {
123        Keyword(s.to_string())
124    }
125}
126
127
128#[derive(Debug, Serialize, Deserialize, Described, Clone, PartialEq, Eq, PartialOrd, Ord)]
129#[metadata_type(ElasticMeta)]
130#[metadata(mapping="wildcard")]
131pub struct Wildcard(String);
132
133impl std::fmt::Display for Wildcard {
134    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
135        f.write_str(&self.0)
136    }
137}
138
139impl std::ops::Deref for Wildcard {
140    type Target = str;
141
142    fn deref(&self) -> &Self::Target {
143        &self.0
144    }
145}
146
147impl From<String> for Wildcard {
148    fn from(s: String) -> Self {
149        Wildcard(s)
150    }
151}
152
153impl From<&str> for Wildcard {
154    fn from(s: &str) -> Self {
155        Wildcard(s.to_string())
156    }
157}
158
159
160/// Uppercase String
161#[derive(Debug, SerializeDisplay, DeserializeFromStr, Described, Clone, PartialEq, Eq, PartialOrd, Ord)]
162#[metadata_type(ElasticMeta)]
163pub struct UpperString(String);
164
165
166impl std::fmt::Display for UpperString {
167    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
168        f.write_str(&self.0)
169    }
170}
171
172impl std::ops::Deref for UpperString {
173    type Target = str;
174
175    fn deref(&self) -> &Self::Target {
176        &self.0
177    }
178}
179
180impl std::str::FromStr for UpperString {
181    type Err = ModelError;
182
183    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
184        let value = s.trim().to_uppercase();
185        Ok(UpperString(value))
186    }
187}
188
189impl From<&str> for UpperString {
190    fn from(s: &str) -> Self {
191        let value = s.trim().to_uppercase();
192        UpperString(value)
193    }
194}
195
196impl PartialEq<&str> for UpperString {
197    fn eq(&self, other: &&str) -> bool {
198        self.0.eq(other)
199    }
200}
201
202
203#[derive(Serialize, Deserialize, Described, PartialEq, Eq, Debug, Clone, Default)]
204#[metadata_type(ElasticMeta)]
205#[metadata(mapping="text")]
206pub struct Text(pub String);
207
208impl From<&str> for Text {
209    fn from(value: &str) -> Self {
210        Self(value.to_owned())
211    }
212}
213
214impl From<String> for Text {
215    fn from(value: String) -> Self {
216        Self(value)
217    }
218}
219
220impl From<Text> for String {
221    fn from(value: Text) -> String {
222        value.0
223    }
224}
225
226impl std::fmt::Display for Text {
227    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
228        f.write_str(&self.0)
229    }
230}
231
232impl Text {
233    pub fn as_str(&self) -> &str {
234        self.0.as_str()
235    }
236}
237
238#[derive(Debug, thiserror::Error)]
239#[error("Could not process {original} as a {name}: {error}")]
240pub struct ValidationError {
241    original: String, 
242    name: &'static str, 
243    error: String
244}
245
246
247pub trait StringValidator {
248    fn validate<'a>(data: &'a str) -> Result<Cow<'a, str>, ValidationError>;
249}
250
251
252#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
253pub struct ValidatedString<Validator> {
254    value: String,
255    validator: PhantomData<Validator>
256}
257
258impl<Validator> std::fmt::Display for ValidatedString<Validator> {
259    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
260        f.write_str(&self.value)
261    }
262}
263
264impl<Validator> std::ops::Deref for ValidatedString<Validator> {
265    type Target = str;
266
267    fn deref(&self) -> &Self::Target {
268        &self.value
269    }
270}
271
272impl<Validator> Described<ElasticMeta> for ValidatedString<Validator> {
273    fn metadata() -> struct_metadata::Descriptor<ElasticMeta> {
274        String::metadata()
275    }
276}
277
278impl<Validator: StringValidator> Serialize for ValidatedString<Validator> {
279    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
280    where
281        S: serde::Serializer {
282        self.value.serialize(serializer)
283    }
284}
285
286impl<'de, Validator: StringValidator> Deserialize<'de> for ValidatedString<Validator> {
287    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
288    where
289        D: serde::Deserializer<'de> {
290        let value = String::deserialize(deserializer)?;
291        match Validator::validate(&value) {
292            Ok(value) => Ok(Self { value: value.to_string(), validator: PhantomData}),
293            Err(error) => Err(serde::de::Error::custom(error.to_string())),
294        }
295    }
296}
297
298impl<Validator: StringValidator> FromStr for ValidatedString<Validator> {
299    type Err = ValidationError;
300
301    fn from_str(s: &str) -> Result<Self, Self::Err> {
302        match check_domain(s) {
303            Ok(value) => Ok(Self { value, validator: PhantomData}),
304            Err(err) => Err(ValidationError {
305                original: s.to_owned(),
306                name: "domain",
307                error: format!("Domain rejected: {err:?}"),
308            }),
309        }
310    }
311}
312
313// MARK: Domains
314
315#[derive(Debug, thiserror::Error)]
316pub enum DomainError {
317    #[error("An empty string was provided where a domain was expected")]
318    Empty,
319    #[error("No top level domain name found")]
320    NoDot,
321    #[error("An invalid IDNA string was found")]
322    InvalidIDNA,
323    #[error("Illigal characters were found")]
324    IlligalCharacter,
325    #[error("The top level domain was rejected")]
326    InvalidTLD,
327    #[error("Input failed validation")]
328    Validation,
329}
330
331const DOMAIN_REGEX: &str = r"(?:(?:[A-Za-z0-9\u00a1-\U0010ffff][A-Za-z0-9\u00a1-\U0010ffff_-]{0,62})?[A-Za-z0-9\u00a1-\U0010ffff]\.)+(?:[Xx][Nn]--)?(?:[A-Za-z0-9\u00a1-\U0010ffff]{2,}\.?)";
332const DOMAIN_ONLY_REGEX: &str = concat!("^", DOMAIN_REGEX, "$");
333const DOMAIN_EXCLUDED_NORM_CHARS: &str = "./?@#";
334
335
336fn is_domain_excluded_char(item: char) -> bool {
337    DOMAIN_EXCLUDED_NORM_CHARS.contains(item)
338    // todo!()
339    // self.excluded_chars = set(DOMAIN_EXCLUDED_NORM_CHARS)
340}
341
342pub fn check_domain(data: &str) -> Result<String, DomainError> {
343
344    if data.is_empty() {
345        return Err(DomainError::Empty)
346    }
347
348    let data = data.replace('\u{3002}', ".");
349
350    if !data.contains('.') {
351        return Err(DomainError::NoDot)
352    }
353
354    let mut normalized_parts = vec![];
355    for segment in data.split('.'){
356        if segment.is_ascii() {
357            let segment = segment.to_ascii_lowercase();
358            if segment.starts_with("xn--") {
359                let (domain, error) = idna::domain_to_unicode(&segment);
360                if error.is_err() {
361                    return Err(DomainError::InvalidIDNA)
362                }
363                normalized_parts.push(domain);
364                continue
365            }
366            normalized_parts.push(segment);
367        } else {
368            let segment_norm = segment.nfkc().collect::<String>();
369            // segment_norm = unicodedata.normalize('NFKC', segment)
370            if segment != segment_norm && segment_norm.chars().any(is_domain_excluded_char) {
371                return Err(DomainError::IlligalCharacter)
372                // raise ValueError(f"[{self.name or self.parent_name}] '{segment}' in '{value}' "
373                //                     f"includes a Unicode character that can not be normalized to '{segment_norm}'.")
374            } 
375            normalized_parts.push(segment_norm);
376        }
377    }
378    
379    let mut domain = normalized_parts.join(".");
380
381    static VALIDATION_REGEX: LazyLock<Regex> = LazyLock::new(||{
382        Regex::new(DOMAIN_ONLY_REGEX).expect("Error in static domain only regex")
383    });
384    if !VALIDATION_REGEX.is_match(&domain){
385        return Err(DomainError::Validation)
386        // raise ValueError(f"[{self.name or self.parent_name}] '{domain}' not match the "
387        //                     f"validator: {self.validation_regex.pattern}")
388    }
389    while let Some(new_domain) = domain.strip_suffix(".") {
390        domain = new_domain.to_owned();
391    }
392
393    if domain.contains("@") {
394        return Err(DomainError::IlligalCharacter)   
395    }
396
397    if let Some((_, tld)) = domain.rsplit_once(".") {
398        let mut tld = tld.to_uppercase();
399        if !tld.is_ascii() {
400            tld = match domain_to_ascii(&tld) {
401                Ok(tld) => tld.to_ascii_uppercase(),
402                Err(_) => return Err(DomainError::InvalidTLD),
403            };
404        }
405
406        let domain = domain.to_uppercase();
407        let combined_tlds = find_top_level_domains();
408        if combined_tlds.contains(&tld) || TLDS_SPECIAL_BY_DOMAIN.iter().any(|d| domain.ends_with(d)) {
409            return Ok(domain.to_lowercase())
410        }
411    }
412
413    Err(DomainError::InvalidTLD)
414} 
415
416// def is_valid_domain(domain: str) -> bool:
417//     if "@" in domain:
418//         return False
419
420//     if "." in domain:
421//         domain = domain.upper()
422//         tld = domain.split(".")[-1]
423//         if not tld.isascii():
424//             try:
425//                 tld = tld.encode('idna').decode('ascii').upper()
426//             except ValueError:
427//                 return False
428
429//         combined_tlds = find_top_level_domains()
430//         if tld in combined_tlds:
431//             # Single term TLD check
432//             return True
433
434//         elif any(domain.endswith(d) for d in TLDS_SPECIAL_BY_DOMAIN):
435//             # Multi-term TLD check
436//             return True
437
438//     return False
439
440
441fn system_local_tld() -> Vec<String> {
442    let raw_tlds = match std::env::var("SYSTEM_LOCAL_TLD") {
443        Ok(tlds) => tlds,
444        Err(std::env::VarError::NotPresent) => String::new(),
445        Err(std::env::VarError::NotUnicode(_)) => {
446            panic!("SYSTEM_LOCAL_TLD contains non unicode data")
447        }
448    };
449
450    let mut tlds = vec![];
451    for tld in raw_tlds.split(";") {
452        let tld = tld.trim();
453        if !tld.is_empty() {
454            tlds.push(tld.to_owned())
455        }
456    }
457    tlds
458}
459
460/// Combine (once and memoize) the three different sources of TLD.
461fn find_top_level_domains() -> &'static HashSet<String> {
462    static TLDS: LazyLock<HashSet<String>> = LazyLock::new(|| {
463        use super::net_static::TLDS_ALPHA_BY_DOMAIN;
464        let mut combined_tlds = HashSet::<String>::new(); 
465        combined_tlds.extend(TLDS_ALPHA_BY_DOMAIN.iter().map(|s|s.to_string()));
466
467        for d in TLDS_SPECIAL_BY_DOMAIN {
468            if !d.contains(".") {
469                combined_tlds.insert(d.to_owned());
470            }
471        } 
472
473        for tld in system_local_tld() {
474            let tld = tld.trim_matches('.').to_uppercase();
475            if !tld.is_empty() {
476                combined_tlds.insert(tld);
477            }
478        }
479
480        combined_tlds
481    });
482    &TLDS
483}
484
485pub struct DomainValidator;
486impl StringValidator for DomainValidator {
487    fn validate<'a>(data: &'a str) -> Result<Cow<'a, str>, ValidationError> {
488        match check_domain(data) {
489            Ok(domain) => Ok(domain.into()),
490            Err(err) => Err(ValidationError { original: data.to_string(), name: "domain", error: err.to_string() }),
491        }
492    }
493}
494
495/// validated domain string
496pub type Domain = ValidatedString<DomainValidator>;
497
498#[test]
499fn internationalized_domains() {
500    assert_eq!(check_domain("ουτοπία.δπθ.gr").unwrap(), "ουτοπία.δπθ.gr"); 
501    assert_eq!(check_domain("xn--kxae4bafwg.xn--pxaix.gr").unwrap(), "ουτοπία.δπθ.gr");
502    assert_eq!(check_domain("site.XN--W4RS40L").unwrap(), "site.嘉里"); 
503    assert!(check_domain("ουτοπία.δπθ.g").is_err()); 
504    assert!(check_domain("ουτοπία..gr").is_err()); 
505    assert!(check_domain("xn--kxae4bafwg.xn--pxaix.g").is_err());
506    assert!(check_domain("xn--kxae4bafwg.xn--xaix.gr").is_err());
507}
508
509// MARK: URI
510// Used for finding URIs in a blob
511const URI_PATH: &str = r"([/?#]\S*)";
512const URI_REGEX: &str = concat!("((?:(?:[A-Za-z][A-Za-z0-9+.-]*:)//)(?:[^/?#\\s]+@)?(", IP_REGEX, "|", DOMAIN_REGEX, ")(?::\\d{1,5})?", URI_PATH, "?)");
513// Used for direct matching
514const FULL_URI: &str = concat!("^", URI_REGEX, "$");
515
516#[derive(Debug, thiserror::Error)]
517pub enum UriParseError{
518    #[error("An empty string was provided as a URI")]
519    Empty,
520    #[error("The value {0} failed to match the URI validator")]
521    Validator(String),
522    #[error("Suggested URI {0} failed domain validation {1}")]
523    Domain(String, String)
524}
525
526pub fn check_uri(value: &str) -> Result<String, UriParseError> {
527    if value.is_empty() {
528        return Err(UriParseError::Empty)
529    }
530
531    static FULL_URI_VALIDATOR: LazyLock<Regex> = LazyLock::new(|| {
532        Regex::new(FULL_URI).expect("Error in uri regex")
533    });
534
535    let matches = match FULL_URI_VALIDATOR.captures(value) {
536        Some(matches) => matches,
537        None => return Err(UriParseError::Validator(value.to_owned()))
538    };
539
540    let host = match matches.get(2) {
541        Some(host) => host.as_str(),
542        None => return Err(UriParseError::Validator(value.to_owned()))
543    };
544
545    let uri = match matches.get(0) {
546        Some(uri) => uri.as_str(),
547        None => return Err(UriParseError::Validator(value.to_owned()))
548    };
549
550    match check_domain(host) {
551        Ok(domain) => Ok(uri.replace(host, &domain)),
552        Err(_) => if is_ip(host) {
553            Ok(uri.replace(host, &host.to_uppercase()))
554        } else {
555            Err(UriParseError::Domain(value.to_owned(), host.to_owned()))
556        },
557    }
558}
559
560pub struct UriValidator;
561impl StringValidator for UriValidator {
562    fn validate<'a>(data: &'a str) -> Result<Cow<'a, str>, ValidationError> {
563        match check_uri(data) {
564            Ok(data) => Ok(data.into()),
565            Err(err) => Err(ValidationError { original: data.to_string(), name: "uri", error: err.to_string() }),
566        }
567    }
568}
569
570
571// class URI(Keyword):
572//     def __init__(self, *args, **kwargs):
573//         super().__init__(*args, **kwargs)
574//         self.validation_regex = re.compile(FULL_URI)
575
576
577/// Validated uri type
578pub type Uri = ValidatedString<UriValidator>;
579
580// /// Unvalidated platform type
581// pub type Platform = String;
582
583// /// Unvalidated processor type
584// pub type Processor = String;
585
586// /// Unvalidated phone number type
587// pub type PhoneNumber = String;
588
589// /// Unvalidated MAC type
590// pub type Mac = String;
591
592// /// Unvalidated UNCPath type
593// pub type UNCPath = String;
594
595// /// Unvalidated UriPath type
596// pub type UriPath = String;
597
598// MARK: IP
599
600const IPV4_REGEX: &str = r"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)";
601const IPV6_REGEX: &str = concat!(
602    r"(?:(?:[0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,7}:|",
603    r"(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,2}|",
604    r"(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,3}|(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,4}|",
605    r"(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:(?:(?::[0-9a-fA-F]{1,4}){1,6})|",
606    r":(?:(?::[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(?::[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|",
607    r"::(?:ffff(?::0{1,4}){0,1}:){0,1}(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|",
608    r"(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])|(?:[0-9a-fA-F]{1,4}:){1,4}:(?:(?:25[0-5]|",
609    r"(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9]))"
610);
611const IP_REGEX: &str = concat!("(?:", IPV4_REGEX, "|", IPV6_REGEX, ")");
612const IP_ONLY_REGEX: &str = concat!("^", IP_REGEX, "$");
613// const IPV4_ONLY_REGEX: &str = concat!("^", IPV4_REGEX, "$");
614// const IPV6_ONLY_REGEX: &str = concat!("^", IPV6_REGEX, "$");
615
616pub fn is_ip(value: &str) -> bool {
617    static IP: LazyLock<Regex> = LazyLock::new(|| {
618        Regex::new(IP_ONLY_REGEX).expect("IP Regex error")
619    });
620    IP.is_match(value)
621}
622
623// class IP(Keyword):
624//     def __init__(self, *args, allow_ipv6=True, allow_ipv4=True, **kwargs):
625//         super().__init__(*args, **kwargs)
626//         if allow_ipv4 and allow_ipv6:
627//             self.validation_regex = re.compile(IP_ONLY_REGEX)
628//         elif allow_ipv4:
629//             self.validation_regex = re.compile(IPV4_ONLY_REGEX)
630//         elif allow_ipv6:
631//             self.validation_regex = re.compile(IPV6_ONLY_REGEX)
632//         else:
633//             raise ValueError("IP type field should allow at least one of IPv4 or IPv6...")
634
635//     def check(self, value, **kwargs):
636//         if self.optional and value is None:
637//             return None
638
639//         if not value:
640//             return None
641
642//         if not self.validation_regex.match(value):
643//             raise ValueError(f"[{self.name or self.parent_name}] '{value}' not match the "
644//                              f"validator: {self.validation_regex.pattern}")
645
646//         # An additional check for type validation
647
648//         # IPv4
649//         if "." in value:
650//             return ".".join([str(int(x)) for x in value.split(".")])
651//         # IPv6
652//         else:
653//             return ":".join([str(x) for x in value.split(":")])
654
655
656// MARK: UNC Path
657const PORT_REGEX: &str = r"(0|[1-9][0-9]{0,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])";
658const UNC_PATH_REGEX: &str = concat!(
659    r"^\\\\[a-zA-Z0-9\-_\s]{1,63}(?:\.[a-zA-Z0-9\-_\s]{1,63}){0,3}",
660    "(?:@SSL)?(?:@", PORT_REGEX, ")?",
661    r#"(?:\\[^\\\/\:\*\?"<>\|\r\n]{1,64})+\\*$"#
662);
663
664pub fn is_unc_path(value: &str) -> bool {
665    static PARSER: LazyLock<Regex> = LazyLock::new(|| {
666        Regex::new(UNC_PATH_REGEX).expect("UNC path regex error")
667    });
668    PARSER.is_match(value)
669}
670
671// class UNCPath(ValidatedKeyword):
672//     def __init__(self, *args, **kwargs):
673//         super().__init__(UNC_PATH_REGEX, *args, **kwargs)
674
675
676// MARK: URI Path
677
678static URI_PATH_PARSER: LazyLock<Regex> = LazyLock::new(|| {
679    Regex::new(URI_PATH).expect("URI path regex error")
680});
681
682pub fn is_uri_path(value: &str) -> bool {
683    URI_PATH_PARSER.is_match(value)
684}
685
686// MARK: MAC
687
688const MAC_REGEX: &str = r"^(?:(?:[0-9a-f]{2}-){5}[0-9a-f]{2}|(?:[0-9a-f]{2}:){5}[0-9a-f]{2})$";
689
690static MAC_PARSER: LazyLock<Regex> = LazyLock::new(|| {
691    Regex::new(MAC_REGEX).expect("MAC regex error")
692});
693
694
695pub fn is_mac(value: &str) -> bool {
696    MAC_PARSER.is_match(value)
697}
698
699// MARK: Email
700pub struct EmailValidator;
701impl StringValidator for EmailValidator {
702    fn validate<'a>(data: &'a str) -> Result<Cow<'a, str>, ValidationError> {
703        match check_email(data) {
704            Ok(email) => Ok(email.into()),
705            Err(err) => Err(ValidationError { original: data.to_string(), name: "email", error: err.to_string() }),
706        }
707    }
708}
709
710/// validated Email string
711pub type Email = ValidatedString<EmailValidator>;
712
713#[derive(Debug, thiserror::Error)]
714pub enum EmailError {
715    #[error("an empty string was provided where an email was expected")]
716    Empty,
717    #[error("{0} did not match email validator")]
718    Validation(String),
719    #[error("{0} is not a valid domain in an email")]
720    Domain(String),
721}
722
723const EMAIL_REGEX: &str = concat!("^[a-zA-Z0-9!#$%&'*+/=?^_‘{|}~-]+(?:\\.[a-zA-Z0-9!#$%&'*+/=?^_‘{|}~-]+)*@(", DOMAIN_REGEX, ")$");
724
725static EMAIL_VALIDATOR: LazyLock<Regex> = LazyLock::new(|| {
726    Regex::new(EMAIL_REGEX).expect("Error in email validator")
727});
728
729pub fn check_email(email: &str) -> Result<String, EmailError> {
730    if email.is_empty() {
731        return Err(EmailError::Empty)
732    }
733
734    let matches = match EMAIL_VALIDATOR.captures(email) {
735        Some(matches) => matches,
736        None => return Err(EmailError::Validation(email.to_owned())),
737    };
738
739    match matches.get(1) {
740        Some(domain) => if check_domain(domain.as_str()).is_ok() {
741            Ok(email.to_lowercase())
742        } else {
743            Err(EmailError::Domain(domain.as_str().to_owned()))
744        },
745        None => Err(EmailError::Validation(email.to_owned()))
746    }
747}
748
749// MARK: Phone number
750
751const PHONE_REGEX: &str = r"^(\+?\d{1,2})?[ .-]?(\(\d{3}\)|\d{3})[ .-](\d{3})[ .-](\d{4})$";
752
753static PHONE_PARSER: LazyLock<Regex> = LazyLock::new(|| {
754    Regex::new(PHONE_REGEX).expect("Phone regex error")
755});
756
757
758pub fn is_phone_number(value: &str) -> bool {
759    PHONE_PARSER.is_match(value)
760}