csplit 0.1.0

a clone of the unix coreutil csplit
extern crate regex;
#[allow(unused_imports)]
#[macro_use]
extern crate structopt;
use structopt::StructOpt;
#[macro_use]
extern crate lazy_static;

mod errors;
mod iter;
mod patterns;
#[cfg(test)]
mod tests;
use self::{
    errors::{Error, Result},
    iter::Chunks,
    patterns::Pattern,
};
use std::ffi::OsString;

fn main() {
    match Opt::from_args().and_then(csplit) {
        Ok(()) => std::process::exit(0),
        Err(err) => {
            eprintln!("{}", err);
            std::process::exit(1);
        },
    }
}
///  #### csplit
///  the csplit utility splits file into pieces using the patterns args.  If file is a dash (`'-'`), csplit
///  reads from standard input.
///  The options are as follows:
///
///  ##### **-f** prefix
///  Give created files names beginning with prefix.  The default is **"xx"**.
///
///  ##### **-k**      
///  Do not remove output files if an error occurs or a HUP, INT or TERM signal is received.
///
///  ##### -n number
///  Use number of decimal digits after the prefix to form the file name.  The default is 2.
///
///  ##### -s     
/// Do not write the size of each output file to standard output as it is created.
///
///  The args operands may be a combination of the following patterns:
///  ##### /regexp/[[+|-]offset]
///          Create a file containing the input from the current line to (but not including) the next line
///          matching the given basic regular expression.  An optional offset from the line that matched may
///          be specified.
///
///  ##### %regexp%[[+|-]offset]
///          Same as above but a file is not created for the output.
///
///  ##### line_no 
///          Create containing the input from the current line to (but not including) the specified line
///          number.
///
///  ##### {num}   
/// Repeat the previous pattern the specified number of times.  If it follows a line number pat
/// tern, a new file will be created for each line_no lines, num times.  The first line of the file
/// is line number 1 for historic reasons.
///
///  After all the patterns have been processed, the remaining input data (if there is any) will be written
///  to a new file.
pub fn csplit(opts: Opt) -> Result<()> {
    let Opt {
        text,
        formatter,
        patterns,
        cleanup_after_error,
        write_file_sizes_to_stdout,
    } = opts;
    let mut written = Vec::new();
    for (i, chunk) in Chunks::new(&text, patterns).unwrap().enumerate() {
        let fp = formatter.path(i);
        if write_file_sizes_to_stdout {
            println!("{}: {:05} bytes", &fp, chunk.len())
        }
        match std::fs::write(&fp, chunk) {
            Err(err) => {
                if cleanup_after_error {
                    for fp in written {
                        // deliberately ignored
                        let _ = std::fs::remove_file(fp);
                    }
                }
                return Err(Error::from(err));
            },
            Ok(()) => written.push(fp),
        }
    }
    Ok(())
}

/// read the input (file or stdin) to a String
fn read_input(fp: &OsString) -> Result<String> {
    use std::io::Read;
    let mut s = String::new();
    if fp == "-" {
        std::io::stdin().lock().read_to_string(&mut s)?;
    } else {
        std::fs::File::open(fp)?.read_to_string(&mut s)?;
    };
    Ok(s)
}

/// The formatter for the filename.
pub struct Formatter {
    pub prefix: String,
    pub suffix_digits: usize,
    pub wd: std::path::PathBuf,
}

impl Default for Formatter {
    fn default() -> Self {
        Formatter {
            prefix: "xx".to_string(),
            suffix_digits: 2,
            wd: std::env::current_dir().unwrap(),
        }
    }
}

impl Formatter {
    pub fn path(&self, i: usize) -> String {
        let f = (&self.wd).join(self.filename(i));
        f.to_str().unwrap().to_string()
    }

    pub fn filename(&self, i: usize) -> String { format!("{}{2:01$}", self.prefix, self.suffix_digits, i) }

    pub fn new(prefix: String, suffix_digits: usize) -> std::io::Result<Self> {
        Ok(Formatter {
            prefix,
            suffix_digits,
            wd: std::env::current_dir()?,
        })
    }
}

/// The parsed and cleaned arguments for csplit.
pub struct Opt {
    /// A pattern to match and the number of times to repeat that pattern
    pub patterns: Vec<patterns::Pattern>,
    /// The formatter for the output filenames
    pub formatter: Formatter,
    pub text: String,
    pub cleanup_after_error: bool,
    pub write_file_sizes_to_stdout: bool,
}

impl<'a> Opt {
    pub fn from_args() -> Result<Self> {
        let opts = UserOpts::from_args();
        Ok(Opt {
            formatter: Formatter::new(opts.prefix, opts.suffix_digits)?,
            patterns: patterns::build_from_args(&opts.patterns)?,
            cleanup_after_error: !opts.no_remove_on_error_or_signal,
            write_file_sizes_to_stdout: !opts.no_write_size,
            text: read_input(&opts.file)?,
        })
    }
}
#[derive(Debug, structopt::StructOpt, PartialEq, Clone)]
/// UserOpts represents outwards-facing options; that is, the options specified
/// in the man page.
pub struct UserOpts {
    #[structopt(short = "f", long = "prefix", default_value = "xx")]
    /// `-f`, `--prefix`: Give created files names beginning with prefix.  The
    /// default is ``xx''.
    prefix: String,

    #[structopt(short = "k")]
    /// <PARTIALLY IMPLEMENTED> `-k`: Do not remove output files if an error
    /// occurs or a HUP, INT or TERM signal is received. This is implemented
    /// for ERRORS but not for signals.
    no_remove_on_error_or_signal: bool,

    #[structopt(short = "n", long = "number", default_value = "2")]
    /// `-n`, `--number`: Use number of decimal digits after the prefix to form
    /// the file name.  The default is 2.
    suffix_digits: usize,

    #[structopt(short = "s")]
    /// `-s`: Do not write the size of each output file to standard output as it
    /// is created.
    no_write_size: bool,

    #[structopt(parse(from_os_str))]
    /// the file to split. if `'-'`, default to stdin
    file: OsString,

    #[structopt(parse(from_os_str))]
    ///  `/regexp/[[+|-]offset]`
    ///          Create a file containing the input from the current line to
    /// (but not including) the next line
    ///          matching the given basic regular expression.  An optional
    /// offset from the line that matched may          be specified.
    ///
    ///  `%regexp%[[+|-]offset]`
    ///          Same as above but a file is not created for the output.
    ///
    ///  `line_no`
    ///          Create containing the input from the current line to (but not
    /// including) the specified line          number.
    ///
    ///  `{num}`   Repeat the previous pattern the specified number of times.
    /// If it follows a line number pat-
    ///          tern, a new file will be created for each line_no lines, num
    /// times.  The first line of the file          is line number 1 for
    /// historic reasons.
    patterns: Vec<OsString>,
}