maman 0.13.1

Rust Web Crawler
Documentation
extern crate env_logger;
#[macro_use]
extern crate log;
extern crate mime;
extern crate url;

#[macro_use]
extern crate maman;
extern crate sidekiq;

use std::env;
use std::process;
use std::str::FromStr;

use maman::Spider;
use sidekiq::create_redis_pool;
use url::Url;

const DEFAULT_LIMIT: isize = 0;

fn print_usage() {
    println!(maman_version_string!());
    println!("Usage: maman URL [LIMIT] [MIME_TYPES]");
}

fn fetch_url(url_arg: Option<String>) -> Url {
    match url_arg {
        Some(url) => match Url::parse(url.as_ref()) {
            Ok(u) => u,
            Err(_) => {
                print_usage();
                process::exit(1);
            }
        },
        None => {
            print_usage();
            process::exit(1);
        }
    }
}

fn fetch_limit(limit_arg: Option<String>) -> isize {
    match limit_arg {
        Some(limit) => match limit.parse::<isize>() {
            Err(_) => DEFAULT_LIMIT,
            Ok(l) => l,
        },
        None => DEFAULT_LIMIT,
    }
}

fn fetch_mime_types(mime_types_arg: Option<String>) -> Vec<mime::Mime> {
    let mut mime_types = Vec::new();
    match mime_types_arg {
        Some(mts) => {
            let v: Vec<&str> = mts.split(' ').collect();
            for m in v {
                if let Ok(mime) = mime::Mime::from_str(m) {
                    mime_types.push(mime);
                }
            }
            mime_types
        }
        None => mime_types,
    }
}

fn main() {
    env_logger::init();

    match create_redis_pool() {
        Ok(redis_pool) => {
            let mut spider = Spider::new(
                redis_pool,
                fetch_url(env::args().nth(1)),
                fetch_limit(env::args().nth(2)),
                fetch_mime_types(env::args().nth(3)),
            );
            spider.crawl()
        }
        Err(err) => {
            error!("Redis error: {}", err);
            process::exit(1);
        }
    }
}