1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#[macro_use]
extern crate clap;

use colored::{ColoredString, Colorize};
use reqwest::r#async::Response;
use reqwest::{StatusCode, Url};

use select::document::Document;
use select::predicate::Name;
use std::collections::HashSet;
use std::fmt::Display;

use clap::{App, Arg};
pub fn print_error<T: Display>(x: T) {
    let formatted_str = format!("{}", x).bold_red();
    println!("{}", formatted_str);
}
fn is_valid_status_code(x: StatusCode) -> bool {
    x.is_success() | x.is_redirection()
}
pub fn print_response(x: Response) {
    if is_valid_status_code(x.status()) {
        let formatted_str =
            format!("{} is valid ({})", x.url().as_str(), x.status().as_str()).bold_green();
        println!("{}", formatted_str);
    } else {
        let formatted_str =
            format!("{} failed ({})", x.url().as_str(), x.status().as_str()).bold_red();
        println!("{}", formatted_str);
    }
}
pub trait ColorsExt {
    fn bold_red(&self) -> ColoredString;
    fn bold_green(&self) -> ColoredString;
}
impl ColorsExt for str {
    fn bold_red(self: &str) -> ColoredString {
        self.bold().red()
    }
    fn bold_green(self: &str) -> ColoredString {
        self.bold().green()
    }
}
pub fn make_app<'a, 'b>() -> App<'a, 'b> {
    App::new("Rusty Links")
        .version(crate_version!())
        .author("Jose Luis Ricon <jose@ricon.xyz>")
        .about("Finds dead links in websites")
        .arg(
            Arg::with_name("URL")
                .index(1)
                .help("URL to check links for (e.g. http://www.google.com)"),
        )
        .arg(
            Arg::with_name("n_par")
                .short("p")
                .long("n_par")
                .value_name("N_PAR")
                .help("Number of parallel requests (Default 100)")
                .takes_value(true),
        )
        .arg(
            Arg::with_name("show_ok")
                .short("s")
                .long("show_ok")
                .help("Show links that are ok"),
        )
}
#[derive(Debug)]
pub enum RustyLinksError {
    MalformedUrl,
    RequestError,
}
fn add_http(url_string: &str) -> String {
    if !(url_string.starts_with("http://") | url_string.starts_with("https://")) {
        ["http://", url_string].concat()
    } else {
        url_string.to_owned()
    }
}
pub fn get_links_for_website(url_string: String) -> Result<HashSet<String>, RustyLinksError> {
    let fixed_url = Url::parse(&add_http(&url_string));
    let fixed_url_string = match &fixed_url {
        Ok(e) => e.as_str().to_owned(),
        Err(_) => "".to_owned(),
    };
    let links = fixed_url.map(|url| {
        reqwest::get(url)
            .map(|doc| {
                if is_valid_status_code(doc.status()) {
                    Document::from_read(doc)
                        .unwrap()
                        .find(Name("a"))
                        .filter_map(|n| n.attr("href"))
                        .map(|x| {
                            if x.starts_with("//") {
                                Option::Some(format!("http://{}", &x[2..]))
                            } else if x.starts_with("/") {
                                Option::Some(format!("{}{}", fixed_url_string, &x[1..]))
                            } else if x.starts_with("http") {
                                Option::Some(x.to_owned())
                            } else {
                                Option::None
                            }
                        })
                        .filter(|elem| elem.is_some())
                        .map(|elem| match elem {
                            Some(e) => e,
                            _ => panic!("This can't happen"),
                        })
                        .collect()
                } else {
                    let err = format!("Could not reach website {}: {}", url_string, doc.status());
                    print_error(err);
                    HashSet::new()
                }
            })
            .map_err(|e| println!("{:?}", e))
    });

    match links {
        Ok(e) => match e {
            Ok(e) => Ok(e),
            Err(_) => Err(RustyLinksError::RequestError),
        },
        Err(e) => {
            println!("{:?}", e);
            Err(RustyLinksError::MalformedUrl)
        }
    }
}