1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
//! Utility functions use regex::Regex; use url::{Url, ParseError}; use reqwest::Client; /// This function parses an incomplete URL /// and returns the full URL. /// /// ## Usage: /// /// ``` /// use clean_url::utils::parse_url; /// /// assert_eq!(Some(String::from("http://example.com/")), parse_url(String::from("example.com"), false)); /// assert_eq!(Some(String::from("https://example.com/")), parse_url(String::from("example.com"), true)); /// assert_eq!(Some(String::from("http://www.example.com/")), parse_url(String::from("www.example.com"), false)); /// assert_eq!(Some(String::from("https://www.example.com/")), parse_url(String::from("www.example.com"), true)); /// ``` pub fn parse_url(url: String, is_secure: bool) -> Option<String> { match Url::parse(url.as_str()) { Ok(u) => { println!("{:?}", u); Some(u.into_string()) } Err(e) => { println!("{:?}", e); match e { ParseError::RelativeUrlWithoutBase => { if is_secure { parse_url(format!("https://{}", url), is_secure) } else { parse_url(format!("http://{}", url), is_secure) } } _ => { None } } } } } /// This function checks the status code /// of a URL and returns the version of the /// URL that is a 200 series. /// /// ## Usage: /// /// ``` /// use clean_url::utils::check_status; /// use tokio_test::block_on; /// /// assert_eq!(block_on(check_status("https://httpbin.org/status/200")), Some(String::from("https://httpbin.org/status/200"))); /// assert_eq!(block_on(check_status("http://www.bertsmithco.com")), Some(String::from("https://bertsmithco.com/"))); /// assert_eq!(block_on(check_status("https://www.bertsmithco.com")), Some(String::from("https://bertsmithco.com/"))); /// assert_eq!(block_on(check_status("https://www.jaredforthmusic.com")), Some(String::from("https://jaredforthmusic.com/"))); /// assert_eq!(block_on(check_status("http://www.jaredforthmusic.com")), Some(String::from("https://jaredforthmusic.com/"))); /// ``` pub async fn check_status(url: &str) -> Option<String> { let client = Client::new(); match client.get(url).send().await { Ok(r) => { let code = r.status(); let resp_url = r.url(); println!("{:?} - {:?}", code, resp_url); if code.is_success() { Some(resp_url.to_string()) } else { println!("{:?}", r); match r.headers().get("server") { Some(server) => { let server_name = server.to_str().unwrap(); if server_name == "Squarespace" { // There is no hope, just return the URL Some(resp_url.to_string()) } else { None } } None => None } } } Err(e) => { println!("{:?}", e); None } } } lazy_static! { static ref WWW_RE: Regex = Regex::new(r"www\.").unwrap(); static ref HTTP_RE: Regex = Regex::new(r"(?P<http>https?://)").unwrap(); } /// Removes www if a URL has it, and /// adds www if a URL does not /// /// ## Usage: /// /// ``` /// use clean_url::utils::swap_www; ///use tokio_test::block_on; /// /// assert_eq!(String::from("http://www.example.com"), block_on(swap_www("http://example.com"))); /// assert_eq!(String::from("http://example.com"), block_on(swap_www("http://www.example.com"))); /// ``` pub async fn swap_www(url: &str) -> String { if has_www(url).await { remove_www(url).await } else { add_www(url).await } } /// Checks if a URL uses www /// /// ## Usage: /// /// ``` /// use clean_url::utils::has_www; /// use tokio_test::block_on; /// /// assert_eq!(true, block_on(has_www("http://www.example.com"))); /// assert_eq!(false, block_on(has_www("http://example.com"))); /// /// //assert_eq!(String::from("http://www.example.com"), swap_www("http://example.com")); /// //assert_eq!(String::from("http://example.com"), swap_www("http://www.example.com")); /// ``` pub async fn has_www(url: &str) -> bool { WWW_RE.is_match(url) } /// Removes www from a URL /// /// ## Usage: /// ``` /// use clean_url::utils::remove_www; /// use tokio_test::block_on; /// /// assert_eq!(String::from("http://example.com"), block_on(remove_www("http://www.example.com"))); /// ``` pub async fn remove_www(url: &str) -> String { WWW_RE.replace_all(url, "").to_string() } /// Adds www to a URL /// /// ## Usage: /// ``` /// use clean_url::utils::add_www; /// use tokio_test::block_on; /// /// assert_eq!(String::from("http://www.example.com"), block_on(add_www("http://example.com"))); /// ``` pub async fn add_www(url: &str) -> String { if !has_www(url).await { HTTP_RE.replace_all(url, "$http-www.").to_string().replace("-", "") } else { println!("Already has www"); String::from(url) } }