1pub const ALPHA_NUMERIC: &str = "a-zA-Z0-9";
8pub const UPPER_LOWER_CASE: &str = "a-zA-Z";
10pub const LOWER_CASE: &str = "a-z";
12pub const UPPER_CASE: &str = "A-Z";
14pub const DIGITS: &str = "0-9";
16pub const ANY: &str = ".";
17pub const NULL_CHAR: &str = "\0";
18pub const NEW_LINE: &str = "\n";
19pub const FORM_FEED: &str = "\\f";
20pub const TAB: &str = "\t";
21pub const VERTICAL_TAB: &str = "\\v";
22pub const BACKSPACE: &str = "[\\b]";
23pub const EMAIL: &str = r"([a-z0-9_+.]*)@([a-z0-9]+(?:[\-\.]{1}[a-z0-9]+)*\.[a-z]{2,6})";
38pub const WEBSITE_URL: &str = r"(?m)(?:(?:(?P<protocol>ftp|https?)://)?(?:(?P<subdomain>www)\.)?)?(?P<domain_name>[-a-zA-Z0-9]{2,253})(?P<top_level_name>(?:\.[a-z]{2,6})+)(?P<directory>(?:/[a-z0-9]+)+)?(?:\?(?P<query_params>[-a-zA-Z0-9@:%_\+~#()&//=]*))?";
84pub const PERSIAN_ALPHABET: &str = r"\u0621-\u0628\u062A-\u063A\u0641-\u0642\u0644-\u0648\u064E-\u0651\u0655\u067E\u0686\u0698\u06A9\u06AF\u06BE\u06CC|\p{arabic}";
86pub const PERSIAN_ARABIC_NUM: &str = r"\u06F0-\u06F9\u0660-\u0669";
88pub const PERSIAN_ALPHA_NUMERIC: &str = r"\u0621-\u0628\u062A-\u063A\u0641-\u0642\u0644-\u0648\u064E-\u0651\u0655\u067E\u0686\u0698\u06A9\u06AF\u06BE\u06CC\u06F0-\u06F9\u0660-\u0669";
90pub const PERSIAN_PUNCTUATION: &str = r"\u060C\u061B\u061F\u0640\u066A\u066B\u066C";
91pub const PERSIAN_SPACES: &str = r"\u0020\u2000-\u200F\u2028-\u202F";
93pub const FRENCH_ALPHABET: &str = r"a-zA-Z\u00C0-\u017F";
95pub const GERMAN_ALPHABET: &str = r"a-zA-Z\u00E4\u00F6\u00FC\u00C4\u00D6\u00DC\u00df";
97pub const CHINESE_ALPHABET: &str = r"\u4e00-\u9fa5";
99pub const TIME_HH_MM_12_AMPM: &str = r"\b(1[0-2]|0?[1-9]):([0-5]?\d)(?: ?((?i)[ap]m))?\b";
152pub const TIME_HH_MM_SS_12_AMPM: &str =
154 r"\b(1[0-2]|0?[1-9]):([0-5]?\d):([0-5]?\d)(?: ?((?i)[ap]m))?\b";
155pub const TIME_HH_MM_24: &str = r"\b([01]?\d|2[0-3]):([0-5]?\d)\b";
157pub const TIME_HH_MM_SS_24: &str = r"\b([01]?\d|2[0-3]):([0-5]?\d):([0-5]?\d)\b";
159pub const MONTH_NAMES: &str = r"January|Jan\.?|February|Feb\.?|March|Mar\.?|April|Apr\.?|May|June|Jun\.?July|Jul\.?|August|Aug\.?|September|Sep\.?|Sept|October|Oct\.?|November|Nov\.?|December|Dec\.?";
161const _DAY: &str = r"([ 0-2]?[1-9]|[1-2]0|3[01])";
162const _MONTH: &str = r"([ 0]?[1-9]|10|11|12)";
163const _YEAR: &str = r"([1-9]\d{3})";
164fn _date_regex_generator() -> String {
165 format!(
166 r"\b{}/{}/{}\b|\b{}/{}/{}\b|\b{}/{}/{}\b|\b{}{}{}\b|\b{}{}{}\b|\b{}{}{}\b|\b{}-{}-{}\b|\b{}-{}-{}\b|\b{}-{}-{}\b|\b({})(?:,| |, )?{}(?:,| |, )?{}\b|\b{}(?:,| |, )?({})(?:,| |, )?{}\b|\b{}(?:,| |, )?({})(?:,| |, )?{}\b",
167 _MONTH,
168 _DAY,
169 _YEAR,
170 _DAY,
171 _MONTH,
172 _YEAR,
173 _YEAR,
174 _MONTH,
175 _DAY,
176 _MONTH,
177 _DAY,
178 _YEAR,
179 _DAY,
180 _MONTH,
181 _YEAR,
182 _YEAR,
183 _MONTH,
184 _DAY,
185 _MONTH,
186 _DAY,
187 _YEAR,
188 _DAY,
189 _MONTH,
190 _YEAR,
191 _YEAR,
192 _MONTH,
193 _DAY,
194 MONTH_NAMES,
195 _DAY,
196 _YEAR,
197 _DAY,
198 MONTH_NAMES,
199 _YEAR,
200 _YEAR,
201 MONTH_NAMES,
202 _DAY,
203 )
204}
205pub const DATE: &str = r"\b([ 0]?[1-9]|10|11|12)/([ 0-2]?[1-9]|[1-2]0|3[01])/([1-9]\d{3})\b|\b([ 0-2]?[1-9]|[1-2]0|3[01])/([ 0]?[1-9]|10|11|12)/([1-9]\d{3})\b|\b([1-9]\d{3})/([ 0]?[1-9]|10|11|12)/([ 0-2]?[1-9]|[1-2]0|3[01])\b|\b([ 0]?[1-9]|10|11|12)([ 0-2]?[1-9]|[1-2]0|3[01])([1-9]\d{3})\b|\b([ 0-2]?[1-9]|[1-2]0|3[01])([ 0]?[1-9]|10|11|12)([1-9]\d{3})\b|\b([1-9]\d{3})([ 0]?[1-9]|10|11|12)([ 0-2]?[1-9]|[1-2]0|3[01])\b|\b([ 0]?[1-9]|10|11|12)-([ 0-2]?[1-9]|[1-2]0|3[01])-([1-9]\d{3})\b|\b([ 0-2]?[1-9]|[1-2]0|3[01])-([ 0]?[1-9]|10|11|12)-([1-9]\d{3})\b|\b([1-9]\d{3})-([ 0]?[1-9]|10|11|12)-([ 0-2]?[1-9]|[1-2]0|3[01])\b|\b(January|Jan\.?|February|Feb\.?|March|Mar\.?|April|Apr\.?|May|June|Jun\.?July|Jul\.?|August|Aug\.?|September|Sep\.?|Sept|October|Oct\.?|November|Nov\.?|December|Dec\.?)(?:,| |, )?([ 0-2]?[1-9]|[1-2]0|3[01])(?:,| |, )?([1-9]\d{3})\b|\b([ 0-2]?[1-9]|[1-2]0|3[01])(?:,| |, )?(January|Jan\.?|February|Feb\.?|March|Mar\.?|April|Apr\.?|May|June|Jun\.?July|Jul\.?|August|Aug\.?|September|Sep\.?|Sept|October|Oct\.?|November|Nov\.?|December|Dec\.?)(?:,| |, )?([1-9]\d{3})\b|\b([1-9]\d{3})(?:,| |, )?(January|Jan\.?|February|Feb\.?|March|Mar\.?|April|Apr\.?|May|June|Jun\.?July|Jul\.?|August|Aug\.?|September|Sep\.?|Sept|October|Oct\.?|November|Nov\.?|December|Dec\.?)(?:,| |, )?([ 0-2]?[1-9]|[1-2]0|3[01])\b";
210pub const IPV4: &str = r"\b(?:(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\b";
211lazy_static! {
212 pub static ref IPV6: &'static str = r"\b(?:(?:[0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,7}:|(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,2}|(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,3}|(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,4}|(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:(?:(?::[0-9a-fA-F]{1,4}){1,6})|:(?:(?::[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(?::[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(?:ffff(?::0{1,4}){0,1}:){0,1}(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])|(?:[0-9a-fA-F]{1,4}:){1,4}:(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\b";
213 pub static ref IPV4_6: &'static str = r"\b(?:(?:(?:(?:(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])))|(?:(?:(?:(?:[0-9A-Fa-f]{1,4}:){7}(?:[0-9A-Fa-f]{1,4}|:))|(?:(?:[0-9A-Fa-f]{1,4}:){6}(?::[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(?:(?:[0-9A-Fa-f]{1,4}:){5}(?:(?:(?::[0-9A-Fa-f]{1,4}){1,2})|:(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(?:(?:[0-9A-Fa-f]{1,4}:){4}(?:(?:(?::[0-9A-Fa-f]{1,4}){1,3})|(?:(?::[0-9A-Fa-f]{1,4})?:(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(?:(?:[0-9A-Fa-f]{1,4}:){3}(?:(?:(?::[0-9A-Fa-f]{1,4}){1,4})|(?:(?::[0-9A-Fa-f]{1,4}){0,2}:(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(?:(?:[0-9A-Fa-f]{1,4}:){2}(?:(?:(?::[0-9A-Fa-f]{1,4}){1,5})|(?:(?::[0-9A-Fa-f]{1,4}){0,3}:(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(?:(?:[0-9A-Fa-f]{1,4}:){1}(?:(?:(?::[0-9A-Fa-f]{1,4}){1,6})|(?:(?::[0-9A-Fa-f]{1,4}){0,4}:(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(?::(?:(?:(?::[0-9A-Fa-f]{1,4}){1,7})|(?:(?::[0-9A-Fa-f]{1,4}){0,5}:(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(?:%.+)?))\b";
214}
215
216#[cfg(test)]
217mod tests {
218 use crate::{
219 collection::*,
220 settings::{base::*, group::*},
221 EasyRegex,
222 };
223
224 #[test]
225 fn persian_words_regex_works() {
226 let result = EasyRegex::new_section().list(&PERSIAN_ALPHA_NUMERIC, &ONE_OR_MORE);
227
228 let text = "سلام شماره من ۱۲۳۶ است";
229 let is_match = result.clone().get_regex().unwrap().find_iter(text).count();
230 result
231 .get_regex()
232 .unwrap()
233 .find_iter(text)
234 .into_iter()
235 .for_each(|found| {
236 println!("{}", found.as_str());
237 });
238
239 assert_eq!(5, is_match);
240 }
241
242 #[test]
243 fn french_words_regex_works() {
244 let text = "Adélaïde Aurélie Gaëlle";
245 let result = EasyRegex::new_section().list(&FRENCH_ALPHABET, &ONE_OR_MORE);
246
247 let count = result.get_regex().unwrap().captures_iter(text).count();
248 assert_eq!(3, count);
249 }
250
251 #[test]
252 fn german_words_regex_works() {
253 let text = "Müller Sönke Käthe";
254 let result = EasyRegex::new_section().list(&GERMAN_ALPHABET, &ONE_OR_MORE);
255
256 let count = result.get_regex().unwrap().captures_iter(text).count();
257 assert_eq!(3, count);
258 }
259
260 #[test]
261 fn chinese_words_regex_works() {
262 let text = "正则表达式";
263 let result = EasyRegex::new_section().list(&CHINESE_ALPHABET, &ONE_OR_MORE);
264
265 let is_match = result.get_regex().unwrap().is_match(text);
266 assert_eq!(true, is_match);
267 }
268
269 #[test]
270 fn website_url_works() {
271 let text = r#"http://www.swimming-pool.co.uk/products/shorts?searchMe=queryMe&name=smith
272 something@gmail.com
273 www.seasoning.com
274 university.gov helloworld.com
275 https://javaScript.com
276 "#;
277
278 let result = EasyRegex::new(WEBSITE_URL);
279 let captures = result.get_regex().unwrap();
280 captures.captures_iter(text).for_each(|caps| {
281 println!(
282 "protocol: {}, subdomain: {}, domain name: {}, top level name: {}, directory: {}, query params: {}\n",
283 &caps.get(1).map_or("not found", |m| m.as_str()), &caps.get(2).map_or("not found", |m| m.as_str()), &caps.get(3).map_or("not found", |m| m.as_str()), &caps.get(4).map_or("not found", |m| m.as_str()), &caps.get(5).map_or("not found", |m| m.as_str()), &caps.get(6).map_or("not found", |m| m.as_str()), );
290 })
291 }
292
293 #[test]
294 fn time_works() {
295 let text = "7:4 5:20 6:30am 02:2 01:30";
296 let result = EasyRegex::new(TIME_HH_MM_12_AMPM);
297 result
298 .clone()
299 .get_regex()
300 .unwrap()
301 .captures_iter(text)
302 .for_each(|f| {
303 println!("{:?}", f);
304 });
305 let count = result.get_regex().unwrap().captures_iter(text).count();
306 assert_eq!(5, count);
307 }
308
309 #[test]
310 fn date_and_time_works() {
311 let text = r#"
312 Feb 17 2009 5:3am 03/26/1994 8:41 23/7/2030 9:20Pm
313 12 Sept 2015 6:14 03-26-1994 2:18 2030/4/27 3:50
314 "#;
315 let result = EasyRegex::new_section()
316 .group(DATE, &DEFAULT_GROUP) .literal_space()
318 .group(TIME_HH_MM_24, &DEFAULT_GROUP); result
320 .clone()
321 .get_regex()
322 .unwrap()
323 .captures_iter(text)
324 .for_each(|captures| println!("{}", captures.get(0).unwrap().as_str()));
325 let matched_patterns_count = result.get_regex().unwrap().captures_iter(text).count();
328 assert_eq!(4, matched_patterns_count);
329 }
347
348 #[test]
349 fn ip_works() {
350 let text =
351 "2001:0db8:85a3:0000:0000:8a2e:0370:7334 5002:0db8:85a3:0000:0000:8a2e:0560:7334";
352 let result = EasyRegex::new(&IPV6);
353 result
354 .clone()
355 .get_regex()
356 .unwrap()
357 .captures_iter(text)
358 .for_each(|f| {
359 println!("{:?}", f);
360 });
361 let count = result.get_regex().unwrap().captures_iter(text).count();
362 assert_eq!(2, count);
363 }
364
365 #[test]
366 fn date_gen_output() {
367 println!("{}", _date_regex_generator());
368 }
369}