rjoin 0.2.0

A tool for joining CSV data on command line.
use std::path::{Path, PathBuf,};
use std::error::Error;
use clap::{App, Arg, ArgGroup, };

pub fn app() -> App<'static, 'static> {
    App::new("rjoin")
        .author(crate_authors!())
        .version(crate_version!())
        .about("joins lines of two files with identical join fields.")
        .arg(Arg::with_name("show_left")
                 .short("l")
                 .long("show-left")
                 .help("print the unmatched lines from the left file"))
        .arg(Arg::with_name("show_right")
                 .short("r")
                 .long("show-right")
                 .help("print the unmatched lines from the right file"))
        .arg(Arg::with_name("show_both")
                 .short("b")
                 .long("show-both")
                 .help("print the matched lines"))
        .group(ArgGroup::with_name("show_any")
                        .args(&["show_left", "show_right", "show_both"])
                        .multiple(true))
        .arg(Arg::with_name("header")
                 .long("header")
                 .help("treat the first line in each file as field headers, print them without trying to pair them"))
        .arg(Arg::with_name("key")
                 .short("k")
                 .long("key")
                 .conflicts_with_all(&["left_key", "right_key"])
                 .takes_value(true)
                 .min_values(1)
                 .value_delimiter(",")
                 .value_name("FIELDS")
                 .help("equivalent to '--left-key=FIELDS --right-key=FIELDS'"))
        .arg(Arg::with_name("left_key")
                 .long("left-key")
                 .requires("right_key")
                 .takes_value(true)
                 .min_values(1)
                 .value_delimiter(",")
                 .value_name("FIELDS")
                 .help("join on these comma-separated fields in the left file")
                 .long_help(
"join on these comma-separated fields in the left file. The index 
starts with one and must not contain duplicates. The default is 1."))
        .arg(Arg::with_name("right_key")
                 .long("right-key")
                 .requires("left_key")
                 .takes_value(true)
                 .min_values(1)
                 .value_delimiter(",")
                 .value_name("FIELDS")
                 .help("join on these comma-separated fields in the right file")
                 .long_help(
"join on these comma-separated fields in the right file. The index 
starts with one and must not contain duplicates. The default is 1."))
        .arg(Arg::with_name("delimiter")
                 .long("delimiter")
                 .short("d")
                 .takes_value(true)
                 .value_name("CHAR")
                 .conflicts_with_all(&["in_delimiter", "out_delimiter"])
                 .help("equivalent to '--in-delimiter=CHAR --out-delimiter=CHAR'"))
        .arg(Arg::with_name("in_delimiter")
                 .long("in-delimiter")
                 .takes_value(true)
                 .value_name("CHAR")
                 .conflicts_with_all(&["in_left_delimiter", "in_right_delimiter"])
                 .requires("out_delimiter")
                 .help("equivalent to '--in-left-delimiter=CHAR --in-right-delimiter=CHAR'"))
        .arg(Arg::with_name("in_left_delimiter")
                 .long("in-left-delimiter")
                 .takes_value(true)
                 .value_name("CHAR")
                 .requires_all(&["in_right_delimiter", "out_delimiter"])
                 .help("use CHAR as input field delimiter for the left file")
                 .long_help(
"use CHAR as input field delimiter for left file. It must be 1 byte long in utf-8."))
        .arg(Arg::with_name("in_right_delimiter")
                 .long("in-right-delimiter")
                 .takes_value(true)
                 .value_name("CHAR")
                 .requires_all(&["in_left_delimiter", "out_delimiter"])
                 .help("use CHAR as input field delimiter for the right file")
                 .long_help(
"use CHAR as input field delimiter for the right file. It must be 1 byte long in utf-8."))
        .arg(Arg::with_name("out_delimiter")
                 .long("out-delimiter")
                 .takes_value(true)
                 .value_name("CHAR")
                 .help("use CHAR as output field delimiter")
                 .long_help(
"use CHAR as output field delimiter. It must be 1 byte long in utf-8."))
        .arg(Arg::with_name("terminator")
                 .long("terminator")
                 .short("t")
                 .takes_value(true)
                 .value_name("CHAR")
                 .conflicts_with_all(&["in_terminator", "out_terminator"])
                 .help("equivalent to '--in-terminator=CHAR --out-terminator=CHAR'"))
        .arg(Arg::with_name("in_terminator")
                 .long("in-terminator")
                 .takes_value(true)
                 .value_name("CHAR")
                 .requires("out_terminator")
                 .conflicts_with_all(&["in_left_terminator", "in_right_terminator"])
                 .help("equivalent to '--in-left-terminator=CHAR --in-right-terminator=CHAR'"))
        .arg(Arg::with_name("in_left_terminator")
                 .long("in-left-terminator")
                 .takes_value(true)
                 .value_name("CHAR")
                 .requires_all(&["in_right_terminator", "out_terminator"])
                 .help("use CHAR as input record terminator for the left file")
                 .long_help(
"use CHAR as input record terminator for left file. It must be 1 byte long in utf-8."))
        .arg(Arg::with_name("in_right_terminator")
                 .long("in-right-terminator")
                 .takes_value(true)
                 .value_name("CHAR")
                 .requires_all(&["in_left_terminator", "out_terminator"])
                 .help("use CHAR as input record terminator for the right file")
                 .long_help(
"use CHAR as input record terminator for right file. It must be 1 byte long in utf-8."))
        .arg(Arg::with_name("out_terminator")
                 .long("out-terminator")
                 .takes_value(true)
                 .value_name("CHAR")
                 .help("use CHAR as output record terminator")
                 .long_help(
"use CHAR as output record terminator. It must be 1 byte long in utf-8."))
        .arg(Arg::with_name("LEFT_FILE")
                 .help("the left input file")
                 .required(true)
                 .index(1))
        .arg(Arg::with_name("RIGHT_FILE")
                 .help("the right input file")
                 .required(true)
                 .index(2))
}

pub struct Args {
    left_path: PathBuf,
    right_path: PathBuf,
    show_left: bool,
    show_right: bool,
    show_both: bool,
    left_key: Vec<usize>,
    right_key: Vec<usize>,
    in_left_delimiter: u8,
    in_right_delimiter: u8,
    out_delimiter: u8,
    in_left_terminator: u8,
    in_right_terminator: u8,
    out_terminator: u8,
    header: bool,
}

impl Args {
    pub fn parse() -> Result<Args, Box<Error>> {
        let matches = app().get_matches();

        let left_path = matches.value_of("LEFT_FILE").ok_or("expected LEFT_FILE")?;
        let right_path = matches.value_of("RIGHT_FILE").ok_or("expected RIGHT_FILE")?;

        let show_left = matches.is_present("show_left");
        let show_right = matches.is_present("show_right");
        let show_both = !matches.is_present("show_any") || matches.is_present("show_both");

        let header = matches.is_present("header");

        let key: Vec<usize> = match matches.values_of("key").map(|it| it.collect::<Vec<_>>()) {
            Some(v) => validate_key(v, "")?,
            None => vec![0],
        };
        let left_key: Vec<usize> = match matches.values_of("left_key")
                                                .map(|it| it.collect::<Vec<_>>()) {
            Some(v) => validate_key(v, "left ")?,
            None => key.clone(),
        };
        let right_key: Vec<usize> = match matches.values_of("right_key")
                                                 .map(|it| it.collect::<Vec<_>>()) {
            Some(v) => validate_key(v, "right ")?,
            None => key.clone(),
        };

        if left_key.len() != right_key.len() {
            return Err("the left key and the right key parameters have different lenght".into());
        }

        let delimiter = match matches.value_of("delimiter")
                                     .map(|s| s.as_bytes()) {
            Some(b) => {
                if b.len() > 1 {
                    return Err("the field delimiter must be 1 byte long in utf8".into());
                }
                b[0]
            }
            None => b','
        };
        let in_delimiter = match matches.value_of("in_delimiter")
                                        .map(|s| s.as_bytes()) {
            Some(b) => {
                if b.len() > 1 {
                    return Err("the input field delimiter must be 1 byte long in utf8".into());
                }
                b[0]
            }
            None => delimiter
        };
        let out_delimiter = match matches.value_of("out_delimiter")
                                         .map(|s| s.as_bytes()) {
            Some(b) => {
                if b.len() > 1 {
                    return Err("the output field delimiter must be 1 byte long in utf8".into());
                }
                b[0]
            }
            None => delimiter
        };
        let in_left_delimiter = match matches.value_of("in_left_delimiter")
                                             .map(|s| s.as_bytes()) {
            Some(b) => {
                if b.len() > 1 {
                    return Err("the left input field delimiter must be 1 byte long in utf8".into());
                }
                b[0]
            }
            None => in_delimiter
        };
        let in_right_delimiter = match matches.value_of("in_right_delimiter")
                                             .map(|s| s.as_bytes()) {
            Some(b) => {
                if b.len() > 1 {
                    return Err("the right input field delimiter must be 1 byte long in utf8".into());
                }
                b[0]
            }
            None => in_delimiter
        };

        let terminator = match matches.value_of("terminator")
                                   .map(|s| s.as_bytes()) {
            Some(b) => {
                if b.len() > 1 {
                    return Err("the record terminator must be 1 byte long in utf8".into());
                }
                b[0]
            }
            None => b'\n'
        };
        let in_terminator = match matches.value_of("in_terminator")
                                      .map(|s| s.as_bytes()) {
            Some(b) => {
                if b.len() > 1 {
                    return Err("the input record terminator must be 1 byte long in utf8".into());
                }
                b[0]
            }
            None => terminator
        };
        let out_terminator = match matches.value_of("out_terminator")
                                       .map(|s| s.as_bytes()) {
            Some(b) => {
                if b.len() > 1 {
                    return Err("the output record terminator must be 1 byte long in utf8".into());
                }
                b[0]
            }
            None => terminator
        };
        let in_left_terminator = match matches.value_of("in_left_terminator")
                                           .map(|s| s.as_bytes()) {
            Some(b) => {
                if b.len() > 1 {
                    return Err("the left input record terminator must be 1 byte long in \
                    utf8".into());
                }
                b[0]
            }
            None => in_terminator
        };
        let in_right_terminator = match matches.value_of("in_right_terminator")
                                            .map(|s| s.as_bytes()) {
            Some(b) => {
                if b.len() > 1 {
                    return Err("the right input record terminator must be 1 byte long in \
                    utf8".into());
                }
                b[0]
            }
            None => in_terminator
        };


        let args = Args { 
            left_path: left_path.into(),
            right_path: right_path.into(),
            show_left: show_left,
            show_right: show_right,
            show_both: show_both,
            left_key: left_key,
            right_key: right_key,
            in_left_delimiter: in_left_delimiter,
            in_right_delimiter: in_right_delimiter,
            out_delimiter: out_delimiter,
            in_left_terminator: in_left_terminator,
            in_right_terminator: in_right_terminator,
            out_terminator: out_terminator,
            header: header,
        };
        Ok(args)
    }
    pub fn left_path(&self) -> &Path {
        &self.left_path
    }
    pub fn right_path(&self) -> &Path {
        &self.right_path
    }
    pub fn show_left(&self) -> bool {
        self.show_left
    }
    pub fn show_right(&self) -> bool {
        self.show_right
    }
    pub fn show_both(&self) -> bool {
        self.show_both
    }
    pub fn left_key(&self) -> &[usize] {
        &self.left_key
    }
    pub fn right_key(&self) -> &[usize] {
        &self.right_key
    }
    pub fn in_left_delimiter(&self) -> u8 {
        self.in_left_delimiter
    }
    pub fn in_right_delimiter(&self) -> u8 {
        self.in_right_delimiter
    }
    pub fn out_delimiter(&self) -> u8 {
        self.out_delimiter
    }
    pub fn in_left_terminator(&self) -> u8 {
        self.in_left_terminator
    }
    pub fn in_right_terminator(&self) -> u8 {
        self.in_right_terminator
    }
    pub fn out_terminator(&self) -> u8 {
        self.out_terminator
    }
    pub fn header(&self) -> bool {
        self.header
    }
}

        
fn validate_key(k: Vec<&str>, which: &str) -> Result<Vec<usize>, Box<Error>> {
    let out = Ok(k)
        .map(|v| v.iter().map(|s| s.parse::<usize>())
                         .collect::<Vec<_>>())
        .and_then(|v| {
            let mut out: Vec<usize> = Vec::with_capacity(v.len());
            for (x, r) in v.iter().enumerate() {
                match *r {
                    Ok(i) => out.push(i),
                    Err(_) => return Err(format!("could not parse the {}key parameter at \
                                                  the position {}", which, x + 1).into()),
                }
            }
            Ok(out)
        })
        .and_then(|mut v| {
            if v.iter().any(|&i| i < 1) {
                return Err("the key fields must use 1-based numbering".into());
            }
            for i in v.iter_mut() {
               *i -= 1;
            }
            Ok(v)
        });
    out
}