1use regex::Regex;
2use std::sync::LazyLock;
3
4pub static DOI_REGEX: LazyLock<Regex> =
5 LazyLock::new(|| Regex::new(r"^10\.[0-9]{4,9}/.{1,200}$").unwrap());
6pub static RESOURCE_URL_REGEX: LazyLock<Regex> =
7 LazyLock::new(|| Regex::new(r"^(?i)(https?|ftp)://.*$").unwrap());
8pub static ORCID_REGEX: LazyLock<Regex> = LazyLock::new(|| {
9 Regex::new(r"^https?://orcid.org/[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[X0-9]{1}$").unwrap()
10});
11pub static INSTITUTION_PID_REGEX: LazyLock<Regex> =
12 LazyLock::new(|| Regex::new(r"[hH][tT][tT][pP][sS]://.{1,50}$").unwrap());
13pub static NAME_REGEX: LazyLock<Regex> =
14 LazyLock::new(|| Regex::new(r"^[^\d\?\s][^\d\?]*[^\?\s]+[^\d]*").unwrap());
15
16#[cfg(test)]
17mod unit {
18 use super::*;
19
20 mod doi {
21 use super::*;
22
23 #[test]
24 fn valid_dois() {
25 let valid = [
26 "10.1000/xyz123",
27 "10.1234/abcdef",
28 "10.12345/some-article",
29 "10.123456789/a",
30 "10.1016/j.cell.2020.01.001",
31 "10.1038/nature12373",
32 "10.1109/5.771073",
33 "10.1002/(SICI)1097-4636(199905)45:2<133::AID-JBM9>3.0.CO;2-T",
34 ];
35 for doi in valid {
36 assert!(DOI_REGEX.is_match(doi), "should match: {}", doi);
37 }
38 }
39
40 #[test]
41 fn invalid_dois() {
42 let invalid = [
43 "11.1000/xyz123", "10.123/xyz", "10.1234/", "doi:10.1234/abc", "10.1234567890/abc", ];
49 for doi in invalid {
50 assert!(!DOI_REGEX.is_match(doi), "should not match: {}", doi);
51 }
52 }
53
54 #[test]
55 fn suffix_length_limit() {
56 let at_limit = format!("10.1234/{}", "a".repeat(200));
57 assert!(DOI_REGEX.is_match(&at_limit));
58 let over_limit = format!("10.1234/{}", "a".repeat(201));
59 assert!(!DOI_REGEX.is_match(&over_limit));
60 }
61 }
62 mod resource_url {
63 use super::*;
64
65 #[test]
66 fn valid_urls() {
67 let valid = [
68 "http://example.com",
69 "https://example.com",
70 "HTTP://EXAMPLE.COM",
71 "HTTPS://example.com/path",
72 "ftp://files.example.com",
73 "FTP://files.example.com",
74 "https://example.com/path/to/resource?query=1&other=2",
75 "http://localhost:8080",
76 "https://sub.domain.example.com",
77 ];
78 for url in valid {
79 assert!(RESOURCE_URL_REGEX.is_match(url), "should match: {}", url);
80 }
81 }
82
83 #[test]
84 fn invalid_urls() {
85 let invalid = [
86 "example.com",
87 "mailto:test@example.com",
88 "file:///path/to/file",
89 "//example.com",
90 "httpx://example.com",
91 ];
92 for url in invalid {
93 assert!(
94 !RESOURCE_URL_REGEX.is_match(url),
95 "should not match: {}",
96 url
97 );
98 }
99 }
100
101 #[test]
102 fn rejects_non_start_anchored() {
103 assert!(!RESOURCE_URL_REGEX.is_match("visit http://example.com"));
105 }
106 }
107 mod orcid {
108 use super::*;
109
110 #[test]
111 fn valid_orcids() {
112 let valid = [
113 "https://orcid.org/0000-0002-1825-0097",
114 "http://orcid.org/0000-0002-1825-0097",
115 "https://orcid.org/0000-0001-5109-3700",
116 "https://orcid.org/0000-0002-1694-233X", "http://orcid.org/1234-5678-9012-345X",
118 ];
119 for orcid in valid {
120 assert!(ORCID_REGEX.is_match(orcid), "should match: {}", orcid);
121 }
122 }
123
124 #[test]
125 fn invalid_orcids() {
126 let invalid = [
127 "orcid.org/0000-0002-1825-0097", "https://orcid.org/0000-0002-1825-009", "https://orcid.org/0000-0002-1825-00977", "https://orcid.org/000-0002-1825-0097", "https://orcid.org/0000-0002-1825-009Y", "https://notorcid.org/0000-0002-1825-0097", "ftp://orcid.org/0000-0002-1825-0097", ];
135 for orcid in invalid {
136 assert!(!ORCID_REGEX.is_match(orcid), "should not match: {}", orcid);
137 }
138 }
139
140 #[test]
141 fn checksum_x_only_in_last_position() {
142 assert!(ORCID_REGEX.is_match("https://orcid.org/0000-0002-1825-009X"));
143 assert!(!ORCID_REGEX.is_match("https://orcid.org/0000-0002-1825-00X9"));
144 }
145 }
146 mod institution_pid {
147 use super::*;
148
149 #[test]
150 fn valid_institution_pids() {
151 let valid = [
152 "https://example.com",
153 "HTTPS://ror.org/12345",
154 "HtTpS://isni.org/isni/0000000121032683",
155 "https://a", ];
157 for pid in valid {
158 assert!(INSTITUTION_PID_REGEX.is_match(pid), "should match: {}", pid);
159 }
160 }
161
162 #[test]
163 fn invalid_institution_pids() {
164 let invalid = [
165 "http://example.com", "ftp://example.com",
167 "https://", ];
169 for pid in invalid {
170 assert!(
171 !INSTITUTION_PID_REGEX.is_match(pid),
172 "should not match: {}",
173 pid
174 );
175 }
176 }
177
178 #[test]
179 fn max_length_after_protocol() {
180 let at_limit = format!("https://{}", "a".repeat(50));
181 let over_limit = format!("https://{}", "a".repeat(51));
182
183 assert!(INSTITUTION_PID_REGEX.is_match(&at_limit));
184 assert!(!INSTITUTION_PID_REGEX.is_match(&over_limit));
185 }
186 }
187 mod name {
188 use super::*;
189
190 #[test]
191 fn valid_names() {
192 let valid = [
193 "John Smith",
194 "María García",
195 "Jean-Pierre",
196 "O'Connor",
197 "李明",
198 "Müller",
199 "Anne-Marie O'Brien",
200 ];
201 for name in valid {
202 assert!(NAME_REGEX.is_match(name), "should match: {}", name);
203 }
204 }
205
206 #[test]
207 fn invalid_names() {
208 let invalid = ["123", "?", "???", " ", " "];
209 for name in invalid {
210 assert!(!NAME_REGEX.is_match(name), "should not match: {}", name);
211 }
212 }
213
214 #[test]
215 fn names_with_some_digits_at_edges() {
216 assert!(NAME_REGEX.is_match("John Smith III"));
217 assert!(NAME_REGEX.is_match("John 3rd Smith"));
218 }
219
220 #[test]
221 fn rejects_digit_only_or_question_mark_only() {
222 assert!(!NAME_REGEX.is_match("12345"));
223 assert!(!NAME_REGEX.is_match("?????"));
224 }
225 }
226}