1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
extern crate docopt; extern crate env_logger; #[macro_use] extern crate log; extern crate html5ever; extern crate url; extern crate cargo_metadata; extern crate num_cpus; extern crate rayon; extern crate reqwest; extern crate serde_json; extern crate walkdir; use std::{ fmt, path::{Path, PathBuf}, }; use rayon::prelude::*; use walkdir::{DirEntry, WalkDir}; use check::is_available; use parse::parse_html_file; pub use check::{CheckError, HttpError}; mod check; mod parse; #[derive(Debug)] pub struct CheckContext { pub check_http: bool, } #[derive(Debug)] pub struct FileError { pub path: PathBuf, pub errors: Vec<CheckError>, } impl fmt::Display for FileError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "Found invalid urls in {}:", self.path.display(),)?; for err in &self.errors { writeln!(f, "")?; write!(f, "\t{}", err)?; } Ok(()) } } fn is_html_file(entry: &DirEntry) -> bool { match entry.path().extension() { Some(e) => e.to_str().map(|ext| ext == "html").unwrap_or(false), None => false, } } pub fn unavailable_urls<'a>( dir_path: &'a Path, ctx: &'a CheckContext, ) -> impl ParallelIterator<Item = FileError> + 'a { WalkDir::new(dir_path) .into_iter() .par_bridge() .filter_map(|e| e.ok()) .filter(|entry| entry.file_type().is_file() && is_html_file(&entry)) .flat_map(move |entry| { let urls = parse_html_file(entry.path()); let errors = urls .into_iter() .filter_map(|url| match is_available(&url, &ctx) { Ok(()) => None, Err(err) => Some(err), }) .collect::<Vec<_>>(); if errors.is_empty() { None } else { Some(FileError { path: entry.path().to_owned(), errors, }) } }) }