scirs2_text/information_extraction/
patterns.rs1#![allow(missing_docs)]
7
8use lazy_static::lazy_static;
9use regex::Regex;
10
11lazy_static! {
12 pub static ref EMAIL_PATTERN: Regex = Regex::new(
14 r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"
15 ).expect("Operation failed");
16
17 pub static ref URL_PATTERN: Regex = Regex::new(
18 r"https?://(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&/=]*)"
19 ).expect("Operation failed");
20
21 pub static ref PHONE_PATTERN: Regex = Regex::new(
22 r"(?:\+?1[-.\s]?)?\(?([0-9]{3})\)?[-.\s]?([0-9]{3})[-.\s]?([0-9]{4})"
23 ).expect("Operation failed");
24
25 pub static ref DATE_PATTERN: Regex = Regex::new(
26 r"\b(?:(?:0?[1-9]|1[0-2])[/-](?:0?[1-9]|[12][0-9]|3[01])[/-](?:19|20)?\d{2})|(?:(?:19|20)\d{2}[/-](?:0?[1-9]|1[0-2])[/-](?:0?[1-9]|[12][0-9]|3[01]))|(?:(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4})|(?:\d{1,2}\s+(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{4})\b"
27 ).expect("Operation failed");
28
29 pub static ref TIME_PATTERN: Regex = Regex::new(
30 r"\b(?:[01]?[0-9]|2[0-3]):[0-5][0-9](?::[0-5][0-9])?(?:\s*[aApP][mM])?\b"
31 ).expect("Operation failed");
32
33 pub static ref MONEY_PATTERN: Regex = Regex::new(
34 r"[$€£¥]\s*\d+(?:,\d{3})*(?:\.\d{1,2})?|\d+(?:,\d{3})*(?:\.\d{1,2})?\s*(?:dollars?|euros?|pounds?|yen)"
35 ).expect("Operation failed");
36
37 pub static ref PERCENTAGE_PATTERN: Regex = Regex::new(
38 r"\b\d+(?:\.\d+)?%\b"
39 ).expect("Operation failed");
40}