1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
//! Utility functions


use regex::Regex;

use url::{Url, ParseError};
use reqwest::Client;

/// This function parses an incomplete URL

/// and returns the full URL.

///

/// ## Usage:

///

/// ```

/// use clean_url::utils::parse_url;

///

/// assert_eq!(Some(String::from("http://example.com/")), parse_url(String::from("example.com"), false));

/// assert_eq!(Some(String::from("https://example.com/")), parse_url(String::from("example.com"), true));

/// assert_eq!(Some(String::from("http://www.example.com/")), parse_url(String::from("www.example.com"), false));

/// assert_eq!(Some(String::from("https://www.example.com/")), parse_url(String::from("www.example.com"), true));

/// ```

pub fn parse_url(url: String, is_secure: bool) -> Option<String> {
    match Url::parse(url.as_str()) {
        Ok(u) => {
            println!("{:?}", u);
            Some(u.into_string())
        }
        Err(e) => {
            println!("{:?}", e);
            match e {
                ParseError::RelativeUrlWithoutBase => {
                    if is_secure {
                        parse_url(format!("https://{}", url), is_secure)
                    } else {
                        parse_url(format!("http://{}", url), is_secure)
                    }
                }
                _ => { None }
            }
        }
    }
}

/// This function checks the status code

/// of a URL and returns the version of the

/// URL that is a 200 series.

///

/// ## Usage:

///

/// ```

/// use clean_url::utils::check_status;

/// use tokio_test::block_on;

///

/// assert_eq!(block_on(check_status("https://httpbin.org/status/200")), Some(String::from("https://httpbin.org/status/200")));

/// assert_eq!(block_on(check_status("http://www.bertsmithco.com")), Some(String::from("https://bertsmithco.com/")));

/// assert_eq!(block_on(check_status("https://www.bertsmithco.com")), Some(String::from("https://bertsmithco.com/")));

/// assert_eq!(block_on(check_status("https://www.jaredforthmusic.com")), Some(String::from("https://jaredforthmusic.com/")));

/// assert_eq!(block_on(check_status("http://www.jaredforthmusic.com")), Some(String::from("https://jaredforthmusic.com/")));

/// ```

pub async fn check_status(url: &str) -> Option<String> {
    let client = Client::new();
    match client.get(url).send().await {
        Ok(r) => {
            let code = r.status();
            let resp_url = r.url();
            println!("{:?} - {:?}", code, resp_url);
            if code.is_success() {
                Some(resp_url.to_string())
            } else {
                println!("{:?}", r);
                match r.headers().get("server") {
                    Some(server) => {
                        let server_name = server.to_str().unwrap();
                        if server_name == "Squarespace" {
                            // There is no hope, just return the URL

                            Some(resp_url.to_string())
                        } else {
                            None
                        }
                    }
                    None => None
                }
            }
        }
        Err(e) => {
            println!("{:?}", e);
            None
        }
    }
}

lazy_static! {
    static ref WWW_RE: Regex = Regex::new(r"www\.").unwrap();
    static ref HTTP_RE: Regex = Regex::new(r"(?P<http>https?://)").unwrap();
}

/// Removes www if a URL has it, and

/// adds www if a URL does not

///

/// ## Usage:

///

/// ```

/// use clean_url::utils::swap_www;

///use tokio_test::block_on;

///

/// assert_eq!(String::from("http://www.example.com"), block_on(swap_www("http://example.com")));

/// assert_eq!(String::from("http://example.com"), block_on(swap_www("http://www.example.com")));

/// ```

pub async fn swap_www(url: &str) -> String {
    if has_www(url).await {
        remove_www(url).await
    } else {
        add_www(url).await
    }
}

/// Checks if a URL uses www

///

/// ## Usage:

///

/// ```

/// use clean_url::utils::has_www;

/// use tokio_test::block_on;

///

/// assert_eq!(true, block_on(has_www("http://www.example.com")));

/// assert_eq!(false, block_on(has_www("http://example.com")));

///

/// //assert_eq!(String::from("http://www.example.com"), swap_www("http://example.com"));

/// //assert_eq!(String::from("http://example.com"), swap_www("http://www.example.com"));

/// ```

pub async fn has_www(url: &str) -> bool {
    WWW_RE.is_match(url)
}

/// Removes www from a URL

///

/// ## Usage:

/// ```

/// use clean_url::utils::remove_www;

/// use tokio_test::block_on;

///

/// assert_eq!(String::from("http://example.com"), block_on(remove_www("http://www.example.com")));

/// ```

pub async fn remove_www(url: &str) -> String {
    WWW_RE.replace_all(url, "").to_string()
}

/// Adds www to a URL

///

/// ## Usage:

/// ```

/// use clean_url::utils::add_www;

/// use tokio_test::block_on;

///

/// assert_eq!(String::from("http://www.example.com"), block_on(add_www("http://example.com")));

/// ```

pub async fn add_www(url: &str) -> String {
    if !has_www(url).await {
        HTTP_RE.replace_all(url, "$http-www.").to_string().replace("-", "")
    } else {
        println!("Already has www");
        String::from(url)
    }
}