1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
/// Structure to configure `Website` crawler /// <pre> /// let mut website: Website = Website::new("https://choosealicense.com"); /// website.configuration.blacklist_url.push("https://choosealicense.com/licenses/".to_string()); /// website.configuration.respect_robots_txt = true; /// website.configuration.verbose = true; /// localhost.crawl(); /// </pre> #[derive(Debug)] pub struct Configuration { /// Respect robots.txt file and not scrape not allowed files (not implemented) pub respect_robots_txt: bool, /// Print page visited on standart output pub verbose: bool, /// List of page to not crawl pub blacklist_url: Vec<String>, /// User-Agent pub user_agent: &'static str, /// Polite crawling delay in milli seconds pub delay: u64, } impl Configuration { pub fn new() -> Self { Self { respect_robots_txt: false, verbose: false, blacklist_url: Vec::new(), user_agent: "spider/1.1.2", delay: 0, } } }