1use std::borrow::Cow;
2use std::collections::HashSet;
3use std::marker::PhantomData;
4use std::str::FromStr;
5use std::sync::LazyLock;
6
7use idna::domain_to_ascii;
8use regex::Regex;
9use serde::{Deserialize, Serialize};
10use serde_with::{DeserializeFromStr, SerializeDisplay};
11use struct_metadata::Described;
12use unicode_normalization::UnicodeNormalization;
13use constcat::concat;
14
15use crate::types::net_static::TLDS_SPECIAL_BY_DOMAIN;
16use crate::{ElasticMeta, ModelError};
17
18
19#[derive(Debug, Serialize, Deserialize, Described, Clone, PartialEq, Eq, PartialOrd, Ord)]
33#[metadata_type(ElasticMeta)]
34#[metadata(mapping="keyword")]
35pub struct Keyword(String);
36
37impl std::fmt::Display for Keyword {
38 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39 f.write_str(&self.0)
40 }
41}
42
43impl std::ops::Deref for Keyword {
44 type Target = str;
45
46 fn deref(&self) -> &Self::Target {
47 &self.0
48 }
49}
50
51impl From<String> for Keyword {
52 fn from(s: String) -> Self {
53 Keyword(s)
54 }
55}
56
57impl From<&str> for Keyword {
58 fn from(s: &str) -> Self {
59 Keyword(s.to_string())
60 }
61}
62
63
64#[derive(Debug, Serialize, Deserialize, Described, Clone, PartialEq, Eq, PartialOrd, Ord)]
65#[metadata_type(ElasticMeta)]
66#[metadata(mapping="wildcard")]
67pub struct Wildcard(String);
68
69impl std::fmt::Display for Wildcard {
70 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71 f.write_str(&self.0)
72 }
73}
74
75impl std::ops::Deref for Wildcard {
76 type Target = str;
77
78 fn deref(&self) -> &Self::Target {
79 &self.0
80 }
81}
82
83impl From<String> for Wildcard {
84 fn from(s: String) -> Self {
85 Wildcard(s)
86 }
87}
88
89impl From<&str> for Wildcard {
90 fn from(s: &str) -> Self {
91 Wildcard(s.to_string())
92 }
93}
94
95
96#[derive(Debug, SerializeDisplay, DeserializeFromStr, Described, Clone, PartialEq, Eq, PartialOrd, Ord)]
98#[metadata_type(ElasticMeta)]
99pub struct UpperString(String);
100
101
102impl std::fmt::Display for UpperString {
103 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
104 f.write_str(&self.0)
105 }
106}
107
108impl std::ops::Deref for UpperString {
109 type Target = str;
110
111 fn deref(&self) -> &Self::Target {
112 &self.0
113 }
114}
115
116impl std::str::FromStr for UpperString {
117 type Err = ModelError;
118
119 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
120 let value = s.trim().to_uppercase();
121 Ok(UpperString(value))
122 }
123}
124
125impl From<&str> for UpperString {
126 fn from(s: &str) -> Self {
127 let value = s.trim().to_uppercase();
128 UpperString(value)
129 }
130}
131
132impl PartialEq<&str> for UpperString {
133 fn eq(&self, other: &&str) -> bool {
134 self.0.eq(other)
135 }
136}
137
138
139#[derive(Serialize, Deserialize, Described, PartialEq, Eq, Debug, Clone, Default)]
140#[metadata_type(ElasticMeta)]
141#[metadata(mapping="text")]
142pub struct Text(pub String);
143
144impl From<&str> for Text {
145 fn from(value: &str) -> Self {
146 Self(value.to_owned())
147 }
148}
149
150impl From<String> for Text {
151 fn from(value: String) -> Self {
152 Self(value)
153 }
154}
155
156impl From<Text> for String {
157 fn from(value: Text) -> String {
158 value.0
159 }
160}
161
162impl std::fmt::Display for Text {
163 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
164 f.write_str(&self.0)
165 }
166}
167
168impl Text {
169 pub fn as_str(&self) -> &str {
170 self.0.as_str()
171 }
172}
173
174#[derive(Debug, thiserror::Error)]
175#[error("Could not process {original} as a {name}: {error}")]
176pub struct ValidationError {
177 original: String,
178 name: &'static str,
179 error: String
180}
181
182
183pub trait StringValidator {
184 fn validate<'a>(data: &'a str) -> Result<Cow<'a, str>, ValidationError>;
185}
186
187
188#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
189pub struct ValidatedString<Validator> {
190 value: String,
191 validator: PhantomData<Validator>
192}
193
194impl<Validator> std::fmt::Display for ValidatedString<Validator> {
195 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
196 f.write_str(&self.value)
197 }
198}
199
200impl<Validator> std::ops::Deref for ValidatedString<Validator> {
201 type Target = str;
202
203 fn deref(&self) -> &Self::Target {
204 &self.value
205 }
206}
207
208impl<Validator> Described<ElasticMeta> for ValidatedString<Validator> {
209 fn metadata() -> struct_metadata::Descriptor<ElasticMeta> {
210 String::metadata()
211 }
212}
213
214impl<Validator: StringValidator> Serialize for ValidatedString<Validator> {
215 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
216 where
217 S: serde::Serializer {
218 self.value.serialize(serializer)
219 }
220}
221
222impl<'de, Validator: StringValidator> Deserialize<'de> for ValidatedString<Validator> {
223 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
224 where
225 D: serde::Deserializer<'de> {
226 let value = String::deserialize(deserializer)?;
227 match Validator::validate(&value) {
228 Ok(value) => Ok(Self { value: value.to_string(), validator: PhantomData}),
229 Err(error) => Err(serde::de::Error::custom(error.to_string())),
230 }
231 }
232}
233
234impl<Validator: StringValidator> FromStr for ValidatedString<Validator> {
235 type Err = ValidationError;
236
237 fn from_str(s: &str) -> Result<Self, Self::Err> {
238 match check_domain(s) {
239 Ok(value) => Ok(Self { value, validator: PhantomData}),
240 Err(err) => Err(ValidationError {
241 original: s.to_owned(),
242 name: "domain",
243 error: format!("Domain rejected: {err:?}"),
244 }),
245 }
246 }
247}
248
249#[derive(Debug, thiserror::Error)]
252pub enum DomainError {
253 #[error("An empty string was provided where a domain was expected")]
254 Empty,
255 #[error("No top level domain name found")]
256 NoDot,
257 #[error("An invalid IDNA string was found")]
258 InvalidIDNA,
259 #[error("Illigal characters were found")]
260 IlligalCharacter,
261 #[error("The top level domain was rejected")]
262 InvalidTLD,
263 #[error("Input failed validation")]
264 Validation,
265}
266
267const DOMAIN_REGEX: &str = r"(?:(?:[A-Za-z0-9\u00a1-\U0010ffff][A-Za-z0-9\u00a1-\U0010ffff_-]{0,62})?[A-Za-z0-9\u00a1-\U0010ffff]\.)+(?:[Xx][Nn]--)?(?:[A-Za-z0-9\u00a1-\U0010ffff]{2,}\.?)";
268const DOMAIN_ONLY_REGEX: &str = concat!("^", DOMAIN_REGEX, "$");
269const DOMAIN_EXCLUDED_NORM_CHARS: &str = "./?@#";
270
271
272fn is_domain_excluded_char(item: char) -> bool {
273 DOMAIN_EXCLUDED_NORM_CHARS.contains(item)
274 }
277
278pub fn check_domain(data: &str) -> Result<String, DomainError> {
279
280 if data.is_empty() {
281 return Err(DomainError::Empty)
282 }
283
284 let data = data.replace('\u{3002}', ".");
285
286 if !data.contains('.') {
287 return Err(DomainError::NoDot)
288 }
289
290 let mut normalized_parts = vec![];
291 for segment in data.split('.'){
292 if segment.is_ascii() {
293 let segment = segment.to_ascii_lowercase();
294 if segment.starts_with("xn--") {
295 let (domain, error) = idna::domain_to_unicode(&segment);
296 if error.is_err() {
297 return Err(DomainError::InvalidIDNA)
298 }
299 normalized_parts.push(domain);
300 continue
301 }
302 normalized_parts.push(segment);
303 } else {
304 let segment_norm = segment.nfkc().collect::<String>();
305 if segment != segment_norm && segment_norm.chars().any(is_domain_excluded_char) {
307 return Err(DomainError::IlligalCharacter)
308 }
311 normalized_parts.push(segment_norm);
312 }
313 }
314
315 let mut domain = normalized_parts.join(".");
316
317 static VALIDATION_REGEX: LazyLock<Regex> = LazyLock::new(||{
318 Regex::new(DOMAIN_ONLY_REGEX).expect("Error in static domain only regex")
319 });
320 if !VALIDATION_REGEX.is_match(&domain){
321 return Err(DomainError::Validation)
322 }
325 while let Some(new_domain) = domain.strip_suffix(".") {
326 domain = new_domain.to_owned();
327 }
328
329 if domain.contains("@") {
330 return Err(DomainError::IlligalCharacter)
331 }
332
333 if let Some((_, tld)) = domain.rsplit_once(".") {
334 let mut tld = tld.to_uppercase();
335 if !tld.is_ascii() {
336 tld = match domain_to_ascii(&tld) {
337 Ok(tld) => tld.to_ascii_uppercase(),
338 Err(_) => return Err(DomainError::InvalidTLD),
339 };
340 }
341
342 let domain = domain.to_uppercase();
343 let combined_tlds = find_top_level_domains();
344 if combined_tlds.contains(&tld) || TLDS_SPECIAL_BY_DOMAIN.iter().any(|d| domain.ends_with(d)) {
345 return Ok(domain.to_lowercase())
346 }
347 }
348
349 Err(DomainError::InvalidTLD)
350}
351
352fn system_local_tld() -> Vec<String> {
378 let raw_tlds = match std::env::var("SYSTEM_LOCAL_TLD") {
379 Ok(tlds) => tlds,
380 Err(std::env::VarError::NotPresent) => String::new(),
381 Err(std::env::VarError::NotUnicode(_)) => {
382 panic!("SYSTEM_LOCAL_TLD contains non unicode data")
383 }
384 };
385
386 let mut tlds = vec![];
387 for tld in raw_tlds.split(";") {
388 let tld = tld.trim();
389 if !tld.is_empty() {
390 tlds.push(tld.to_owned())
391 }
392 }
393 tlds
394}
395
396fn find_top_level_domains() -> &'static HashSet<String> {
398 static TLDS: LazyLock<HashSet<String>> = LazyLock::new(|| {
399 use super::net_static::TLDS_ALPHA_BY_DOMAIN;
400 let mut combined_tlds = HashSet::<String>::new();
401 combined_tlds.extend(TLDS_ALPHA_BY_DOMAIN.iter().map(|s|s.to_string()));
402
403 for d in TLDS_SPECIAL_BY_DOMAIN {
404 if !d.contains(".") {
405 combined_tlds.insert(d.to_owned());
406 }
407 }
408
409 for tld in system_local_tld() {
410 let tld = tld.trim_matches('.').to_uppercase();
411 if !tld.is_empty() {
412 combined_tlds.insert(tld);
413 }
414 }
415
416 combined_tlds
417 });
418 &TLDS
419}
420
421pub struct DomainValidator;
422impl StringValidator for DomainValidator {
423 fn validate<'a>(data: &'a str) -> Result<Cow<'a, str>, ValidationError> {
424 match check_domain(data) {
425 Ok(domain) => Ok(domain.into()),
426 Err(err) => Err(ValidationError { original: data.to_string(), name: "domain", error: err.to_string() }),
427 }
428 }
429}
430
431pub type Domain = ValidatedString<DomainValidator>;
433
434#[test]
435fn internationalized_domains() {
436 assert_eq!(check_domain("ουτοπία.δπθ.gr").unwrap(), "ουτοπία.δπθ.gr");
437 assert_eq!(check_domain("xn--kxae4bafwg.xn--pxaix.gr").unwrap(), "ουτοπία.δπθ.gr");
438 assert_eq!(check_domain("site.XN--W4RS40L").unwrap(), "site.嘉里");
439 assert!(check_domain("ουτοπία.δπθ.g").is_err());
440 assert!(check_domain("ουτοπία..gr").is_err());
441 assert!(check_domain("xn--kxae4bafwg.xn--pxaix.g").is_err());
442 assert!(check_domain("xn--kxae4bafwg.xn--xaix.gr").is_err());
443}
444
445const URI_PATH: &str = r"([/?#]\S*)";
448const URI_REGEX: &str = concat!("((?:(?:[A-Za-z][A-Za-z0-9+.-]*:)//)(?:[^/?#\\s]+@)?(", IP_REGEX, "|", DOMAIN_REGEX, ")(?::\\d{1,5})?", URI_PATH, "?)");
449const FULL_URI: &str = concat!("^", URI_REGEX, "$");
451
452#[derive(Debug, thiserror::Error)]
453pub enum UriParseError{
454 #[error("An empty string was provided as a URI")]
455 Empty,
456 #[error("The value {0} failed to match the URI validator")]
457 Validator(String),
458 #[error("Suggested URI {0} failed domain validation {1}")]
459 Domain(String, String)
460}
461
462pub fn check_uri(value: &str) -> Result<String, UriParseError> {
463 if value.is_empty() {
464 return Err(UriParseError::Empty)
465 }
466
467 static FULL_URI_VALIDATOR: LazyLock<Regex> = LazyLock::new(|| {
468 Regex::new(FULL_URI).expect("Error in uri regex")
469 });
470
471 let matches = match FULL_URI_VALIDATOR.captures(value) {
472 Some(matches) => matches,
473 None => return Err(UriParseError::Validator(value.to_owned()))
474 };
475
476 let host = match matches.get(2) {
477 Some(host) => host.as_str(),
478 None => return Err(UriParseError::Validator(value.to_owned()))
479 };
480
481 let uri = match matches.get(0) {
482 Some(uri) => uri.as_str(),
483 None => return Err(UriParseError::Validator(value.to_owned()))
484 };
485
486 match check_domain(host) {
487 Ok(domain) => Ok(uri.replace(host, &domain)),
488 Err(_) => if is_ip(host) {
489 Ok(uri.replace(host, &host.to_uppercase()))
490 } else {
491 Err(UriParseError::Domain(value.to_owned(), host.to_owned()))
492 },
493 }
494}
495
496pub struct UriValidator;
497impl StringValidator for UriValidator {
498 fn validate<'a>(data: &'a str) -> Result<Cow<'a, str>, ValidationError> {
499 match check_uri(data) {
500 Ok(data) => Ok(data.into()),
501 Err(err) => Err(ValidationError { original: data.to_string(), name: "uri", error: err.to_string() }),
502 }
503 }
504}
505
506
507pub type Uri = ValidatedString<UriValidator>;
515
516const IPV4_REGEX: &str = r"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)";
537const IPV6_REGEX: &str = concat!(
538 r"(?:(?:[0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,7}:|",
539 r"(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,2}|",
540 r"(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,3}|(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,4}|",
541 r"(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:(?:(?::[0-9a-fA-F]{1,4}){1,6})|",
542 r":(?:(?::[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(?::[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|",
543 r"::(?:ffff(?::0{1,4}){0,1}:){0,1}(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|",
544 r"(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])|(?:[0-9a-fA-F]{1,4}:){1,4}:(?:(?:25[0-5]|",
545 r"(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9]))"
546);
547const IP_REGEX: &str = concat!("(?:", IPV4_REGEX, "|", IPV6_REGEX, ")");
548const IP_ONLY_REGEX: &str = concat!("^", IP_REGEX, "$");
549pub fn is_ip(value: &str) -> bool {
553 static IP: LazyLock<Regex> = LazyLock::new(|| {
554 Regex::new(IP_ONLY_REGEX).expect("IP Regex error")
555 });
556 IP.is_match(value)
557}
558
559const PORT_REGEX: &str = r"(0|[1-9][0-9]{0,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])";
594const UNC_PATH_REGEX: &str = concat!(
595 r"^\\\\[a-zA-Z0-9\-_\s]{1,63}(?:\.[a-zA-Z0-9\-_\s]{1,63}){0,3}",
596 "(?:@SSL)?(?:@", PORT_REGEX, ")?",
597 r#"(?:\\[^\\\/\:\*\?"<>\|\r\n]{1,64})+\\*$"#
598);
599
600pub fn is_unc_path(value: &str) -> bool {
601 static PARSER: LazyLock<Regex> = LazyLock::new(|| {
602 Regex::new(UNC_PATH_REGEX).expect("UNC path regex error")
603 });
604 PARSER.is_match(value)
605}
606
607static URI_PATH_PARSER: LazyLock<Regex> = LazyLock::new(|| {
615 Regex::new(URI_PATH).expect("URI path regex error")
616});
617
618pub fn is_uri_path(value: &str) -> bool {
619 URI_PATH_PARSER.is_match(value)
620}
621
622const MAC_REGEX: &str = r"^(?:(?:[0-9a-f]{2}-){5}[0-9a-f]{2}|(?:[0-9a-f]{2}:){5}[0-9a-f]{2})$";
625
626static MAC_PARSER: LazyLock<Regex> = LazyLock::new(|| {
627 Regex::new(MAC_REGEX).expect("MAC regex error")
628});
629
630
631pub fn is_mac(value: &str) -> bool {
632 MAC_PARSER.is_match(value)
633}
634
635pub struct EmailValidator;
637impl StringValidator for EmailValidator {
638 fn validate<'a>(data: &'a str) -> Result<Cow<'a, str>, ValidationError> {
639 match check_email(data) {
640 Ok(email) => Ok(email.into()),
641 Err(err) => Err(ValidationError { original: data.to_string(), name: "email", error: err.to_string() }),
642 }
643 }
644}
645
646pub type Email = ValidatedString<EmailValidator>;
648
649#[derive(Debug, thiserror::Error)]
650pub enum EmailError {
651 #[error("an empty string was provided where an email was expected")]
652 Empty,
653 #[error("{0} did not match email validator")]
654 Validation(String),
655 #[error("{0} is not a valid domain in an email")]
656 Domain(String),
657}
658
659const EMAIL_REGEX: &str = concat!("^[a-zA-Z0-9!#$%&'*+/=?^_‘{|}~-]+(?:\\.[a-zA-Z0-9!#$%&'*+/=?^_‘{|}~-]+)*@(", DOMAIN_REGEX, ")$");
660
661static EMAIL_VALIDATOR: LazyLock<Regex> = LazyLock::new(|| {
662 Regex::new(EMAIL_REGEX).expect("Error in email validator")
663});
664
665pub fn check_email(email: &str) -> Result<String, EmailError> {
666 if email.is_empty() {
667 return Err(EmailError::Empty)
668 }
669
670 let matches = match EMAIL_VALIDATOR.captures(email) {
671 Some(matches) => matches,
672 None => return Err(EmailError::Validation(email.to_owned())),
673 };
674
675 match matches.get(1) {
676 Some(domain) => if check_domain(domain.as_str()).is_ok() {
677 Ok(email.to_lowercase())
678 } else {
679 Err(EmailError::Domain(domain.as_str().to_owned()))
680 },
681 None => Err(EmailError::Validation(email.to_owned()))
682 }
683}
684
685const PHONE_REGEX: &str = r"^(\+?\d{1,2})?[ .-]?(\(\d{3}\)|\d{3})[ .-](\d{3})[ .-](\d{4})$";
688
689static PHONE_PARSER: LazyLock<Regex> = LazyLock::new(|| {
690 Regex::new(PHONE_REGEX).expect("Phone regex error")
691});
692
693
694pub fn is_phone_number(value: &str) -> bool {
695 PHONE_PARSER.is_match(value)
696}