1pub mod auxiliary;
2pub mod crawler;
3pub mod crawler_builder;
4pub mod handler;
5
6pub use auxiliary::absolute_url;
7pub use crawler::*;
8pub use crawler_builder::*;
9pub use handler::*;
10
11#[cfg(test)]
12mod tests {
13 use std::collections::HashSet;
14
15 use super::*;
16
17 #[tokio::test]
18 async fn crawl_test() {
19 let mut visited = HashSet::new();
20 let mut links = HashSet::new();
21 let mut pages_loaded = 0;
22
23 let errs = Crawler::builder()
24 .add_default_propagators()
25 .whitelist("qiwi-button")
26 .user_agent("Mozilla/5.0 (X11; Linux x86_64)...")
28 .add_handler("*[href]", |args| {
29 if let Some(link) = args.element.unwrap().value().attr("href") {
30 links.insert(link.to_string());
31 }
32 })
33 .on_page(|args| {
34 pages_loaded += 1;
35 let ustr = args.page.url.to_string();
36 if ustr.ends_with(".php") {
37 visited.insert(ustr);
38 }
39 })
40 .depth(3)
41 .build()
42 .unwrap()
43 .crawl("http://plugins.svn.wordpress.org/qiwi-button/")
44 .await
45 .unwrap();
46
47 println!("{:?}", visited);
49 println!("{:?}", links);
50 println!("{:?}", errs);
51
52 assert_eq!(errs.len(), 0);
53 assert_eq!(pages_loaded, 69);
54 assert_eq!(visited.len(), 18);
55 assert_eq!(links.len(), 61);
56 }
57}