use clap::*;
use std::collections::HashMap;
use std::io::BufRead;
pub fn make_subcommand() -> Command {
Command::new("select")
.about("Select fields in the order listed")
.after_help(
r###"
* Fields can be specified by field number or field name.
* Field names must not be specified as a valid IntSpan runlist.
For example, avoid using formats like `1`, `2-6`, or `-`.
Examples:
# Selects fields 6 and 1 from the input file, treating the first line as a header
rgr select tests/rgr/ctg.tsv -H -f 6,1
# Selects fields `ID` and `length` by names
rgr select tests/rgr/ctg.tsv -H -f ID,length
"###,
)
.arg(
Arg::new("infiles")
.required(true)
.num_args(1..)
.index(1)
.help("Input file to process"),
)
.arg(
Arg::new("header")
.long("header")
.short('H')
.action(ArgAction::SetTrue)
.help("Treat the first line of each file as a header"),
)
.arg(
Arg::new("sharp")
.long("sharp")
.short('s')
.action(ArgAction::SetTrue)
.help("Preserve lines starting with a `#` without changes. The default is to ignore them"),
)
.arg(
Arg::new("fields")
.long("fields")
.short('f')
.num_args(1)
.help("Writes selected fields and the generated range field, in the order listed"),
)
.arg(
Arg::new("outfile")
.long("outfile")
.short('o')
.num_args(1)
.default_value("stdout")
.help("Output filename. [stdout] for screen"),
)
}
pub fn execute(args: &ArgMatches) -> anyhow::Result<()> {
let mut writer = intspan::writer(args.get_one::<String>("outfile").unwrap());
let is_header = args.get_flag("header");
let is_sharp = args.get_flag("sharp");
for infile in args.get_many::<String>("infiles").unwrap() {
let reader = intspan::reader(infile);
let mut fields: Vec<usize> = vec![];
'LINE: for (i, line) in reader.lines().map_while(Result::ok).enumerate() {
let parts: Vec<&str> = line.split('\t').collect();
if i == 0 {
if is_header {
let idx_of: HashMap<String, usize> = parts
.iter()
.enumerate()
.map(|(i, field)| (field.to_string(), i + 1))
.collect();
if args.contains_id("fields") {
fields = intspan::named_field_to_idx(
args.get_one::<String>("fields").unwrap(),
&idx_of,
)
.unwrap()
};
} else if args.contains_id("fields") {
fields = intspan::ints_to_idx(args.get_one::<String>("fields").unwrap());
}
if fields.is_empty() {
writer.write_fmt(format_args!("{}\n", line))?;
} else {
let selected: Vec<String> = fields
.iter()
.map(|e| parts.get(*e - 1).unwrap().to_string())
.collect();
writer.write_fmt(format_args!("{}\n", selected.join("\t")))?;
}
continue 'LINE;
}
if line.starts_with('#') {
if is_sharp {
writer.write_fmt(format_args!("{}\n", line))?;
}
continue 'LINE;
}
let new_line: String = if fields.is_empty() {
parts.join("\t").to_string()
} else {
let selected: Vec<String> = fields
.iter()
.map(|e| parts.get(*e - 1).unwrap().to_string())
.collect();
selected.join("\t")
};
writer.write_fmt(format_args!("{}\n", new_line))?;
}
}
Ok(())
}