1use std::borrow::Cow;
2use std::collections::HashSet;
3use std::marker::PhantomData;
4use std::str::FromStr;
5use std::sync::LazyLock;
6
7use idna::domain_to_ascii;
8use regex::Regex;
9use serde::{Deserialize, Serialize};
10use serde_with::{DeserializeFromStr, SerializeDisplay};
11use struct_metadata::Described;
12use unicode_normalization::UnicodeNormalization;
13use constcat::concat;
14
15use crate::types::net_static::TLDS_SPECIAL_BY_DOMAIN;
16use crate::{ElasticMeta, ModelError};
17
18
19#[derive(Debug, Copy, Clone, Serialize, PartialEq, Eq, Hash, PartialOrd, Ord)]
23pub struct ServiceName(internment::Intern<String>);
24
25impl std::fmt::Display for ServiceName {
26 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27 self.0.fmt(f)
28 }
29}
30
31impl std::ops::Deref for ServiceName {
32 type Target = str;
33
34 fn deref(&self) -> &Self::Target {
35 &self.0
36 }
37}
38
39
40impl<'de> Deserialize<'de> for ServiceName {
41 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
42 where
43 D: serde::Deserializer<'de>
44 {
45 struct Visitor {}
46
47 impl<'de> serde::de::Visitor<'de> for Visitor {
48 type Value = ServiceName;
49
50 fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
51 write!(formatter, "expected a string holding a service name or catagory")
52 }
53
54 fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
55 where
56 E: serde::de::Error,
57 {
58 Ok(ServiceName(internment::Intern::from_ref(s)))
59 }
60 }
61 deserializer.deserialize_str(Visitor{})
62 }
63}
64
65impl Described<ElasticMeta> for ServiceName {
66 fn metadata() -> struct_metadata::Descriptor<ElasticMeta> {
67 String::metadata()
68 }
69}
70
71impl ServiceName {
72 pub fn from_string(value: String) -> Self {
73 Self(internment::Intern::new(value))
74 }
75}
76
77impl From<&str> for ServiceName {
78 fn from(value: &str) -> Self {
79 Self(internment::Intern::from_ref(value))
80 }
81}
82
83#[derive(Debug, Serialize, Deserialize, Described, Clone, PartialEq, Eq, PartialOrd, Ord)]
97#[metadata_type(ElasticMeta)]
98#[metadata(mapping="keyword")]
99pub struct Keyword(String);
100
101impl std::fmt::Display for Keyword {
102 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
103 f.write_str(&self.0)
104 }
105}
106
107impl std::ops::Deref for Keyword {
108 type Target = str;
109
110 fn deref(&self) -> &Self::Target {
111 &self.0
112 }
113}
114
115impl From<String> for Keyword {
116 fn from(s: String) -> Self {
117 Keyword(s)
118 }
119}
120
121impl From<&str> for Keyword {
122 fn from(s: &str) -> Self {
123 Keyword(s.to_string())
124 }
125}
126
127
128#[derive(Debug, Serialize, Deserialize, Described, Clone, PartialEq, Eq, PartialOrd, Ord)]
129#[metadata_type(ElasticMeta)]
130#[metadata(mapping="wildcard")]
131pub struct Wildcard(String);
132
133impl std::fmt::Display for Wildcard {
134 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
135 f.write_str(&self.0)
136 }
137}
138
139impl std::ops::Deref for Wildcard {
140 type Target = str;
141
142 fn deref(&self) -> &Self::Target {
143 &self.0
144 }
145}
146
147impl From<String> for Wildcard {
148 fn from(s: String) -> Self {
149 Wildcard(s)
150 }
151}
152
153impl From<&str> for Wildcard {
154 fn from(s: &str) -> Self {
155 Wildcard(s.to_string())
156 }
157}
158
159
160#[derive(Debug, SerializeDisplay, DeserializeFromStr, Described, Clone, PartialEq, Eq, PartialOrd, Ord)]
162#[metadata_type(ElasticMeta)]
163pub struct UpperString(String);
164
165
166impl std::fmt::Display for UpperString {
167 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
168 f.write_str(&self.0)
169 }
170}
171
172impl std::ops::Deref for UpperString {
173 type Target = str;
174
175 fn deref(&self) -> &Self::Target {
176 &self.0
177 }
178}
179
180impl std::str::FromStr for UpperString {
181 type Err = ModelError;
182
183 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
184 let value = s.trim().to_uppercase();
185 Ok(UpperString(value))
186 }
187}
188
189impl From<&str> for UpperString {
190 fn from(s: &str) -> Self {
191 let value = s.trim().to_uppercase();
192 UpperString(value)
193 }
194}
195
196impl PartialEq<&str> for UpperString {
197 fn eq(&self, other: &&str) -> bool {
198 self.0.eq(other)
199 }
200}
201
202
203#[derive(Serialize, Deserialize, Described, PartialEq, Eq, Debug, Clone, Default)]
204#[metadata_type(ElasticMeta)]
205#[metadata(mapping="text")]
206pub struct Text(pub String);
207
208impl From<&str> for Text {
209 fn from(value: &str) -> Self {
210 Self(value.to_owned())
211 }
212}
213
214impl From<String> for Text {
215 fn from(value: String) -> Self {
216 Self(value)
217 }
218}
219
220impl From<Text> for String {
221 fn from(value: Text) -> String {
222 value.0
223 }
224}
225
226impl std::fmt::Display for Text {
227 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
228 f.write_str(&self.0)
229 }
230}
231
232impl Text {
233 pub fn as_str(&self) -> &str {
234 self.0.as_str()
235 }
236}
237
238#[derive(Debug, thiserror::Error)]
239#[error("Could not process {original} as a {name}: {error}")]
240pub struct ValidationError {
241 original: String,
242 name: &'static str,
243 error: String
244}
245
246
247pub trait StringValidator {
248 fn validate<'a>(data: &'a str) -> Result<Cow<'a, str>, ValidationError>;
249}
250
251
252#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
253pub struct ValidatedString<Validator> {
254 value: String,
255 validator: PhantomData<Validator>
256}
257
258impl<Validator> std::fmt::Display for ValidatedString<Validator> {
259 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
260 f.write_str(&self.value)
261 }
262}
263
264impl<Validator> std::ops::Deref for ValidatedString<Validator> {
265 type Target = str;
266
267 fn deref(&self) -> &Self::Target {
268 &self.value
269 }
270}
271
272impl<Validator> Described<ElasticMeta> for ValidatedString<Validator> {
273 fn metadata() -> struct_metadata::Descriptor<ElasticMeta> {
274 String::metadata()
275 }
276}
277
278impl<Validator: StringValidator> Serialize for ValidatedString<Validator> {
279 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
280 where
281 S: serde::Serializer {
282 self.value.serialize(serializer)
283 }
284}
285
286impl<'de, Validator: StringValidator> Deserialize<'de> for ValidatedString<Validator> {
287 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
288 where
289 D: serde::Deserializer<'de> {
290 let value = String::deserialize(deserializer)?;
291 match Validator::validate(&value) {
292 Ok(value) => Ok(Self { value: value.to_string(), validator: PhantomData}),
293 Err(error) => Err(serde::de::Error::custom(error.to_string())),
294 }
295 }
296}
297
298impl<Validator: StringValidator> FromStr for ValidatedString<Validator> {
299 type Err = ValidationError;
300
301 fn from_str(s: &str) -> Result<Self, Self::Err> {
302 match check_domain(s) {
303 Ok(value) => Ok(Self { value, validator: PhantomData}),
304 Err(err) => Err(ValidationError {
305 original: s.to_owned(),
306 name: "domain",
307 error: format!("Domain rejected: {err:?}"),
308 }),
309 }
310 }
311}
312
313#[derive(Debug, thiserror::Error)]
316pub enum DomainError {
317 #[error("An empty string was provided where a domain was expected")]
318 Empty,
319 #[error("No top level domain name found")]
320 NoDot,
321 #[error("An invalid IDNA string was found")]
322 InvalidIDNA,
323 #[error("Illigal characters were found")]
324 IlligalCharacter,
325 #[error("The top level domain was rejected")]
326 InvalidTLD,
327 #[error("Input failed validation")]
328 Validation,
329}
330
331const DOMAIN_REGEX: &str = r"(?:(?:[A-Za-z0-9\u00a1-\U0010ffff][A-Za-z0-9\u00a1-\U0010ffff_-]{0,62})?[A-Za-z0-9\u00a1-\U0010ffff]\.)+(?:[Xx][Nn]--)?(?:[A-Za-z0-9\u00a1-\U0010ffff]{2,}\.?)";
332const DOMAIN_ONLY_REGEX: &str = concat!("^", DOMAIN_REGEX, "$");
333const DOMAIN_EXCLUDED_NORM_CHARS: &str = "./?@#";
334
335
336fn is_domain_excluded_char(item: char) -> bool {
337 DOMAIN_EXCLUDED_NORM_CHARS.contains(item)
338 }
341
342pub fn check_domain(data: &str) -> Result<String, DomainError> {
343
344 if data.is_empty() {
345 return Err(DomainError::Empty)
346 }
347
348 let data = data.replace('\u{3002}', ".");
349
350 if !data.contains('.') {
351 return Err(DomainError::NoDot)
352 }
353
354 let mut normalized_parts = vec![];
355 for segment in data.split('.'){
356 if segment.is_ascii() {
357 let segment = segment.to_ascii_lowercase();
358 if segment.starts_with("xn--") {
359 let (domain, error) = idna::domain_to_unicode(&segment);
360 if error.is_err() {
361 return Err(DomainError::InvalidIDNA)
362 }
363 normalized_parts.push(domain);
364 continue
365 }
366 normalized_parts.push(segment);
367 } else {
368 let segment_norm = segment.nfkc().collect::<String>();
369 if segment != segment_norm && segment_norm.chars().any(is_domain_excluded_char) {
371 return Err(DomainError::IlligalCharacter)
372 }
375 normalized_parts.push(segment_norm);
376 }
377 }
378
379 let mut domain = normalized_parts.join(".");
380
381 static VALIDATION_REGEX: LazyLock<Regex> = LazyLock::new(||{
382 Regex::new(DOMAIN_ONLY_REGEX).expect("Error in static domain only regex")
383 });
384 if !VALIDATION_REGEX.is_match(&domain){
385 return Err(DomainError::Validation)
386 }
389 while let Some(new_domain) = domain.strip_suffix(".") {
390 domain = new_domain.to_owned();
391 }
392
393 if domain.contains("@") {
394 return Err(DomainError::IlligalCharacter)
395 }
396
397 if let Some((_, tld)) = domain.rsplit_once(".") {
398 let mut tld = tld.to_uppercase();
399 if !tld.is_ascii() {
400 tld = match domain_to_ascii(&tld) {
401 Ok(tld) => tld.to_ascii_uppercase(),
402 Err(_) => return Err(DomainError::InvalidTLD),
403 };
404 }
405
406 let domain = domain.to_uppercase();
407 let combined_tlds = find_top_level_domains();
408 if combined_tlds.contains(&tld) || TLDS_SPECIAL_BY_DOMAIN.iter().any(|d| domain.ends_with(d)) {
409 return Ok(domain.to_lowercase())
410 }
411 }
412
413 Err(DomainError::InvalidTLD)
414}
415
416fn system_local_tld() -> Vec<String> {
442 let raw_tlds = match std::env::var("SYSTEM_LOCAL_TLD") {
443 Ok(tlds) => tlds,
444 Err(std::env::VarError::NotPresent) => String::new(),
445 Err(std::env::VarError::NotUnicode(_)) => {
446 panic!("SYSTEM_LOCAL_TLD contains non unicode data")
447 }
448 };
449
450 let mut tlds = vec![];
451 for tld in raw_tlds.split(";") {
452 let tld = tld.trim();
453 if !tld.is_empty() {
454 tlds.push(tld.to_owned())
455 }
456 }
457 tlds
458}
459
460fn find_top_level_domains() -> &'static HashSet<String> {
462 static TLDS: LazyLock<HashSet<String>> = LazyLock::new(|| {
463 use super::net_static::TLDS_ALPHA_BY_DOMAIN;
464 let mut combined_tlds = HashSet::<String>::new();
465 combined_tlds.extend(TLDS_ALPHA_BY_DOMAIN.iter().map(|s|s.to_string()));
466
467 for d in TLDS_SPECIAL_BY_DOMAIN {
468 if !d.contains(".") {
469 combined_tlds.insert(d.to_owned());
470 }
471 }
472
473 for tld in system_local_tld() {
474 let tld = tld.trim_matches('.').to_uppercase();
475 if !tld.is_empty() {
476 combined_tlds.insert(tld);
477 }
478 }
479
480 combined_tlds
481 });
482 &TLDS
483}
484
485pub struct DomainValidator;
486impl StringValidator for DomainValidator {
487 fn validate<'a>(data: &'a str) -> Result<Cow<'a, str>, ValidationError> {
488 match check_domain(data) {
489 Ok(domain) => Ok(domain.into()),
490 Err(err) => Err(ValidationError { original: data.to_string(), name: "domain", error: err.to_string() }),
491 }
492 }
493}
494
495pub type Domain = ValidatedString<DomainValidator>;
497
498#[test]
499fn internationalized_domains() {
500 assert_eq!(check_domain("ουτοπία.δπθ.gr").unwrap(), "ουτοπία.δπθ.gr");
501 assert_eq!(check_domain("xn--kxae4bafwg.xn--pxaix.gr").unwrap(), "ουτοπία.δπθ.gr");
502 assert_eq!(check_domain("site.XN--W4RS40L").unwrap(), "site.嘉里");
503 assert!(check_domain("ουτοπία.δπθ.g").is_err());
504 assert!(check_domain("ουτοπία..gr").is_err());
505 assert!(check_domain("xn--kxae4bafwg.xn--pxaix.g").is_err());
506 assert!(check_domain("xn--kxae4bafwg.xn--xaix.gr").is_err());
507}
508
509const URI_PATH: &str = r"([/?#]\S*)";
512const URI_REGEX: &str = concat!("((?:(?:[A-Za-z][A-Za-z0-9+.-]*:)//)(?:[^/?#\\s]+@)?(", IP_REGEX, "|", DOMAIN_REGEX, ")(?::\\d{1,5})?", URI_PATH, "?)");
513const FULL_URI: &str = concat!("^", URI_REGEX, "$");
515
516#[derive(Debug, thiserror::Error)]
517pub enum UriParseError{
518 #[error("An empty string was provided as a URI")]
519 Empty,
520 #[error("The value {0} failed to match the URI validator")]
521 Validator(String),
522 #[error("Suggested URI {0} failed domain validation {1}")]
523 Domain(String, String)
524}
525
526pub fn check_uri(value: &str) -> Result<String, UriParseError> {
527 if value.is_empty() {
528 return Err(UriParseError::Empty)
529 }
530
531 static FULL_URI_VALIDATOR: LazyLock<Regex> = LazyLock::new(|| {
532 Regex::new(FULL_URI).expect("Error in uri regex")
533 });
534
535 let matches = match FULL_URI_VALIDATOR.captures(value) {
536 Some(matches) => matches,
537 None => return Err(UriParseError::Validator(value.to_owned()))
538 };
539
540 let host = match matches.get(2) {
541 Some(host) => host.as_str(),
542 None => return Err(UriParseError::Validator(value.to_owned()))
543 };
544
545 let uri = match matches.get(0) {
546 Some(uri) => uri.as_str(),
547 None => return Err(UriParseError::Validator(value.to_owned()))
548 };
549
550 match check_domain(host) {
551 Ok(domain) => Ok(uri.replace(host, &domain)),
552 Err(_) => if is_ip(host) {
553 Ok(uri.replace(host, &host.to_uppercase()))
554 } else {
555 Err(UriParseError::Domain(value.to_owned(), host.to_owned()))
556 },
557 }
558}
559
560pub struct UriValidator;
561impl StringValidator for UriValidator {
562 fn validate<'a>(data: &'a str) -> Result<Cow<'a, str>, ValidationError> {
563 match check_uri(data) {
564 Ok(data) => Ok(data.into()),
565 Err(err) => Err(ValidationError { original: data.to_string(), name: "uri", error: err.to_string() }),
566 }
567 }
568}
569
570
571pub type Uri = ValidatedString<UriValidator>;
579
580const IPV4_REGEX: &str = r"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)";
601const IPV6_REGEX: &str = concat!(
602 r"(?:(?:[0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,7}:|",
603 r"(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,2}|",
604 r"(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,3}|(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,4}|",
605 r"(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:(?:(?::[0-9a-fA-F]{1,4}){1,6})|",
606 r":(?:(?::[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(?::[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|",
607 r"::(?:ffff(?::0{1,4}){0,1}:){0,1}(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|",
608 r"(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])|(?:[0-9a-fA-F]{1,4}:){1,4}:(?:(?:25[0-5]|",
609 r"(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9]))"
610);
611const IP_REGEX: &str = concat!("(?:", IPV4_REGEX, "|", IPV6_REGEX, ")");
612const IP_ONLY_REGEX: &str = concat!("^", IP_REGEX, "$");
613pub fn is_ip(value: &str) -> bool {
617 static IP: LazyLock<Regex> = LazyLock::new(|| {
618 Regex::new(IP_ONLY_REGEX).expect("IP Regex error")
619 });
620 IP.is_match(value)
621}
622
623const PORT_REGEX: &str = r"(0|[1-9][0-9]{0,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])";
658const UNC_PATH_REGEX: &str = concat!(
659 r"^\\\\[a-zA-Z0-9\-_\s]{1,63}(?:\.[a-zA-Z0-9\-_\s]{1,63}){0,3}",
660 "(?:@SSL)?(?:@", PORT_REGEX, ")?",
661 r#"(?:\\[^\\\/\:\*\?"<>\|\r\n]{1,64})+\\*$"#
662);
663
664pub fn is_unc_path(value: &str) -> bool {
665 static PARSER: LazyLock<Regex> = LazyLock::new(|| {
666 Regex::new(UNC_PATH_REGEX).expect("UNC path regex error")
667 });
668 PARSER.is_match(value)
669}
670
671static URI_PATH_PARSER: LazyLock<Regex> = LazyLock::new(|| {
679 Regex::new(URI_PATH).expect("URI path regex error")
680});
681
682pub fn is_uri_path(value: &str) -> bool {
683 URI_PATH_PARSER.is_match(value)
684}
685
686const MAC_REGEX: &str = r"^(?:(?:[0-9a-f]{2}-){5}[0-9a-f]{2}|(?:[0-9a-f]{2}:){5}[0-9a-f]{2})$";
689
690static MAC_PARSER: LazyLock<Regex> = LazyLock::new(|| {
691 Regex::new(MAC_REGEX).expect("MAC regex error")
692});
693
694
695pub fn is_mac(value: &str) -> bool {
696 MAC_PARSER.is_match(value)
697}
698
699pub struct EmailValidator;
701impl StringValidator for EmailValidator {
702 fn validate<'a>(data: &'a str) -> Result<Cow<'a, str>, ValidationError> {
703 match check_email(data) {
704 Ok(email) => Ok(email.into()),
705 Err(err) => Err(ValidationError { original: data.to_string(), name: "email", error: err.to_string() }),
706 }
707 }
708}
709
710pub type Email = ValidatedString<EmailValidator>;
712
713#[derive(Debug, thiserror::Error)]
714pub enum EmailError {
715 #[error("an empty string was provided where an email was expected")]
716 Empty,
717 #[error("{0} did not match email validator")]
718 Validation(String),
719 #[error("{0} is not a valid domain in an email")]
720 Domain(String),
721}
722
723const EMAIL_REGEX: &str = concat!("^[a-zA-Z0-9!#$%&'*+/=?^_‘{|}~-]+(?:\\.[a-zA-Z0-9!#$%&'*+/=?^_‘{|}~-]+)*@(", DOMAIN_REGEX, ")$");
724
725static EMAIL_VALIDATOR: LazyLock<Regex> = LazyLock::new(|| {
726 Regex::new(EMAIL_REGEX).expect("Error in email validator")
727});
728
729pub fn check_email(email: &str) -> Result<String, EmailError> {
730 if email.is_empty() {
731 return Err(EmailError::Empty)
732 }
733
734 let matches = match EMAIL_VALIDATOR.captures(email) {
735 Some(matches) => matches,
736 None => return Err(EmailError::Validation(email.to_owned())),
737 };
738
739 match matches.get(1) {
740 Some(domain) => if check_domain(domain.as_str()).is_ok() {
741 Ok(email.to_lowercase())
742 } else {
743 Err(EmailError::Domain(domain.as_str().to_owned()))
744 },
745 None => Err(EmailError::Validation(email.to_owned()))
746 }
747}
748
749const PHONE_REGEX: &str = r"^(\+?\d{1,2})?[ .-]?(\(\d{3}\)|\d{3})[ .-](\d{3})[ .-](\d{4})$";
752
753static PHONE_PARSER: LazyLock<Regex> = LazyLock::new(|| {
754 Regex::new(PHONE_REGEX).expect("Phone regex error")
755});
756
757
758pub fn is_phone_number(value: &str) -> bool {
759 PHONE_PARSER.is_match(value)
760}