use clap::*;
use intspan::*;
use std::collections::BTreeMap;
use std::io::BufRead;
pub fn make_subcommand<'a>() -> Command<'a> {
Command::new("sort")
.about("Sort .rg and .tsv files by a range field")
.after_help(
r###"
* If no part is a valid range, the line will be written to the last
Example:
rgr sort tests/rgr/S288c.rg
rgr sort tests/rgr/ctg.range.tsv
rgr sort tests/rgr/ctg.range.tsv -H -f 3
"###,
)
.arg(
Arg::new("infiles")
.help("Set the input file to use")
.required(true)
.min_values(1)
.index(1),
)
.arg(
Arg::new("header")
.long("header")
.short('H')
.takes_value(false)
.help("Treat the first line of each file as a header"),
)
.arg(
Arg::new("field")
.long("field")
.short('f')
.takes_value(true)
.help("Set the index of the range field. When not set, the first valid range will be used"),
)
.arg(
Arg::new("outfile")
.short('o')
.long("outfile")
.takes_value(true)
.default_value("stdout")
.forbid_empty_values(true)
.help("Output filename. [stdout] for screen"),
)
}
pub fn execute(args: &ArgMatches) -> std::result::Result<(), Box<dyn std::error::Error>> {
let mut writer = writer(args.value_of("outfile").unwrap());
let is_header = args.is_present("header");
let idx_range: usize = if args.is_present("field") {
args.value_of_t("field").unwrap_or_else(|e| {
eprintln!("Need an integer for --field\n{}", e);
std::process::exit(1)
})
} else {
0
};
let mut line_map: BTreeMap<String, Range> = BTreeMap::new();
let mut invalids: Vec<String> = vec![];
for infile in args.values_of("infiles").unwrap() {
let reader = reader(infile);
'LINE: for (i, line) in reader.lines().filter_map(|r| r.ok()).enumerate() {
if is_header && i == 0 {
writer.write_fmt(format_args!("{}\n", line))?;
continue 'LINE;
}
let parts: Vec<&str> = line.split('\t').collect();
if idx_range == 0 {
for part in parts {
let range = Range::from_str(part);
if range.is_valid() {
line_map.insert(line.clone(), range);
continue 'LINE;
}
}
} else {
let range = Range::from_str(parts.get(idx_range - 1).unwrap());
if range.is_valid() {
line_map.insert(line.clone(), range);
continue 'LINE;
}
}
invalids.push(line.clone()); }
}
let mut valids: Vec<String> = line_map.keys().into_iter().map(|e| e.to_string()).collect();
{
valids.sort_by_cached_key(|k| line_map.get(k).unwrap().strand());
valids.sort_by_cached_key(|k| line_map.get(k).unwrap().start());
valids.sort_by_cached_key(|k| line_map.get(k).unwrap().chr());
}
for line in &valids {
writer.write_fmt(format_args!("{}\n", line))?;
}
for line in &invalids {
writer.write_fmt(format_args!("{}\n", line))?;
}
Ok(())
}