scraper 0.8.1

HTML parsing and querying with CSS selectors
extern crate getopts;
extern crate scraper;

use getopts::Options;
use scraper::{Html, Selector};
use std::env;
use std::fs::{self, File};
use std::io::{self, Read};
use std::path::PathBuf;
use std::process;

enum Input {
    Document,
    Fragment,
}

enum Output {
    Html,
    InnerHtml,
    Attr(String),
    Classes,
    Id,
    Name,
    Text,
}

fn query<T: Read>(input: &Input, output: &Output, selector: &Selector, file: &mut T) -> bool {
    let mut html = String::new();
    file.read_to_string(&mut html).unwrap();

    let html = match *input {
        Input::Document => Html::parse_document(&html),
        Input::Fragment => Html::parse_fragment(&html),
    };

    let mut matched = false;
    for element in html.select(selector) {
        use Output::*;
        match *output {
            Html => println!("{}", element.html()),
            InnerHtml => println!("{}", element.inner_html()),
            Attr(ref attr) => println!("{}", element.value().attr(attr).unwrap_or("")),
            Classes => println!("{}", element.value().classes().collect::<Vec<_>>().join(" ")),
            Id => println!("{}", element.value().id().unwrap_or("")),
            Name => println!("{}", element.value().name()),
            Text => println!("{}", element.text().collect::<String>()),
        }
        matched = true;
    }
    matched
}

fn main() {
    let mut opts = Options::new();
    opts.optflag("H", "html", "output HTML of elements");
    opts.optflag("I", "inner-html", "output inner HTML of elements");
    opts.optopt("a", "attr", "output attribute value of elements", "ATTR");
    opts.optflag("c", "classes", "output classes of elements");
    opts.optflag("d", "document", "parse input as HTML documents");
    opts.optflag("f", "fragment", "parse input as HTML fragments");
    opts.optflag("i", "id", "output ID of elements");
    opts.optflag("n", "name", "output name of elements");
    opts.optflag("t", "text", "output text of elements");
    opts.optflag("h", "help", "this cruft");
    opts.optopt("", "install-man-page", "install real documentation", "PATH");

    let args: Vec<String> = env::args().collect();
    let matches = match opts.parse(&args[1..]) {
        Ok(m) => m,
        Err(f) => panic!(f.to_string()),
    };
    if matches.opt_present("h") {
        print!("{}", opts.usage("Usage: scraper [options] SELECTOR [FILE ...]"));
        return;
    }

    if let Some(path) = matches.opt_str("install-man-page") {
        let mut path = PathBuf::from(path);
        if !path.ends_with("man1") {
            path.push("man1");
        }
        fs::create_dir_all(&path).unwrap();
        path.push("scraper.1");
        fs::write(&path, &include_bytes!("../scraper.1")[..]).unwrap();
        return;
    }

    let input =
        if matches.opt_present("f") {
            Input::Fragment
        } else {
            Input::Document
        };

    let output =
        if matches.opt_present("I") {
            Output::InnerHtml
        } else if matches.opt_present("a") {
            Output::Attr(matches.opt_str("a").unwrap())
        } else if matches.opt_present("c") {
            Output::Classes
        } else if matches.opt_present("i") {
            Output::Id
        } else if matches.opt_present("n") {
            Output::Name
        } else if matches.opt_present("t") {
            Output::Text
        } else {
            Output::Html
        };

    let selector = matches.free.first().expect("missing selector");
    let files = &matches.free[1..];

    let selector = Selector::parse(selector).unwrap();

    let matched =
        if files.is_empty() {
            query(&input, &output, &selector, &mut io::stdin())
        } else {
            files.into_iter()
                .map(File::open)
                .map(Result::unwrap)
                .map(|mut f| query(&input, &output, &selector, &mut f))
                .fold(false, |a, m| a || m)
        };

    process::exit(if matched { 0 } else { 1 });
}