pub const WEBSITE_URL: &str = r"(?m)(?:(?:(?P<protocol>ftp|https?)://)?(?:(?P<subdomain>www)\.)?)?(?P<domain_name>[-a-zA-Z0-9]{2,253})(?P<top_level_name>(?:\.[a-z]{2,6})+)(?P<directory>(?:/[a-z0-9]+)+)?(?:\?(?P<query_params>[-a-zA-Z0-9@:%_\+~#()&//=]*))?";
Expand description
Retrieves protocol, subdomain, domain name, top level name, directory and query params of a URL on multiple lines.
ยงExamples
use easy_regex::{collection::*, EasyRegex};
let text = r#"http://www.swimming-pool.co.uk/products/shorts?searchMe=queryMe&name=smith
something@gmail.com
www.seasoning.com
university.gov helloworld.com
https://javaScript.com
"#;
let result = EasyRegex::new(WEBSITE_URL);
let captures = result.get_regex().unwrap();
captures.captures_iter(text).for_each(|caps| {
println!(
"protocol: {}, subdomain: {}, domain name: {}, top level name: {}, directory: {}, query params: {}\n",
// "protocol",
&caps.get(1).map_or("not found", |m| m.as_str()),
// "subdomain",
&caps.get(2).map_or("not found", |m| m.as_str()),
// "domain_name",
&caps.get(3).map_or("not found", |m| m.as_str()),
// "top_level_name",
&caps.get(4).map_or("not found", |m| m.as_str()),
// "directory",
&caps.get(5).map_or("not found", |m| m.as_str()),
// "query_params"
&caps.get(6).map_or("not found", |m| m.as_str()),
);
// will print:
// protocol: http, subdomain: www, domain name: swimming-pool,
// top level name: .co.uk, directory: /products/shorts,
// query params: searchMe=queryMe&name=smith
// protocol: not found, subdomain: www, domain name: seasoning,
// top level name: .com, directory: not found, query params: not found
// protocol: not found, subdomain: not found, domain name: university,
// top level name: .gov, directory: not found, query params: not found
// protocol: https, subdomain: not found, domain name: javaScript,
// top level name: .com, directory: not found, query params: not found
})