qsv 0.87.0

A high performance CSV data-wrangling toolkit.
static USAGE: &str = r#"
Add a new column enumerating the lines of a CSV file. This can be useful to keep
track of a specific line order, give a unique identifier to each line or even
make a copy of the contents of a column.

The enum function can currently be used to perform the following tasks:

  Add an incremental identifier to each of the lines:
    $ qsv enum file.csv

  Add a uuid v4 to each of the lines:
    $ qsv enum --uuid file.csv

  Create a new column filled with a given value:
    $ qsv enum --constant 0

  Copy the contents of a column to a new one:
    $ qsv enum --copy names

  Finally, note that you should also be able to shuffle the lines of a CSV file
  by sorting on the generated uuids:
    $ qsv enum uuid file.csv | qsv sort -s uuid > shuffled.csv

Usage:
    qsv enum [options] [<input>]
    qsv enum --help

enum options:
    -c, --new-column <name>  Name of the column to create.
                             Will default to "index".
    --constant <value>       Fill a new column with the given value.
                             Changes the default column name to "constant".
                             To specify a null value, pass the literal "<NULL>".
    --copy <column>          Name of a column to copy.
                             Changes the default column name to "{column}_copy".
    --uuid                   When set, the column will be populated with
                             uuids (v4) instead of the incremental identifier.
                             Changes the default column name to "uuid".

Common options:
    -h, --help               Display this message
    -o, --output <file>      Write output to <file> instead of stdout.
    -n, --no-headers         When set, the first row will not be interpreted
                             as headers.
    -d, --delimiter <arg>    The field delimiter for reading CSV data.
                             Must be a single character. (default: ,)
"#;

use serde::Deserialize;
use uuid::Uuid;

use crate::{
    config::{Config, Delimiter},
    select::SelectColumns,
    util, CliResult,
};

const NULL_VALUE: &str = "<NULL>";

#[derive(Deserialize)]
struct Args {
    arg_input:       Option<String>,
    flag_new_column: Option<String>,
    flag_constant:   Option<String>,
    flag_copy:       Option<SelectColumns>,
    flag_uuid:       bool,
    flag_output:     Option<String>,
    flag_no_headers: bool,
    flag_delimiter:  Option<Delimiter>,
}

pub fn run(argv: &[&str]) -> CliResult<()> {
    let args: Args = util::get_args(USAGE, argv)?;
    let mut rconfig = Config::new(&args.arg_input)
        .delimiter(args.flag_delimiter)
        .checkutf8(false)
        .no_headers(args.flag_no_headers);

    let mut rdr = rconfig.reader()?;
    let mut wtr = Config::new(&args.flag_output).writer()?;

    let mut headers = rdr.byte_headers()?.clone();

    let mut copy_index = 0;
    let mut copy_operation = false;

    if let Some(column_name) = args.flag_copy {
        rconfig = rconfig.select(column_name);
        let sel = rconfig.selection(&headers)?;
        copy_index = *sel.iter().next().unwrap();
        copy_operation = true;
    }

    if !rconfig.no_headers {
        if let Some(column_name) = &args.flag_new_column {
            headers.push_field(column_name.as_bytes());
        } else if args.flag_uuid {
            headers.push_field(b"uuid");
        } else if args.flag_constant.is_some() {
            headers.push_field(b"constant");
        } else if copy_operation {
            let current_header = match String::from_utf8(headers[copy_index].to_vec()) {
                Ok(s) => s,
                Err(e) => return fail_clierror!("Could not parse cell as utf-8!: {e}"),
            };
            headers.push_field(format!("{current_header}_copy").as_bytes());
        } else {
            headers.push_field(b"index");
        };

        wtr.write_record(&headers)?;
    }

    let mut record = csv::ByteRecord::new();
    let mut counter: u64 = 0;

    while rdr.read_byte_record(&mut record)? {
        if let Some(constant_value) = &args.flag_constant {
            if constant_value == NULL_VALUE {
                record.push_field(b"");
            } else {
                record.push_field(constant_value.as_bytes());
            }
        } else if copy_operation {
            #[allow(clippy::unnecessary_to_owned)]
            record.push_field(&record[copy_index].to_vec());
        } else if args.flag_uuid {
            let id = Uuid::new_v4();
            record.push_field(
                id.as_hyphenated()
                    .encode_lower(&mut Uuid::encode_buffer())
                    .as_bytes(),
            );
        } else {
            record.push_field(counter.to_string().as_bytes());
            counter += 1;
        }
        wtr.write_byte_record(&record)?;
    }
    Ok(wtr.flush()?)
}