pub mod anti_repeat;
pub mod bed;
pub mod filter;
pub mod merge;
pub mod score;
pub mod sort;
mod sort_core;
pub mod split;
use std::fmt;
use std::io::{self, BufRead, Write};
use std::path::Path;
use chaintools::ChainError;
use clap::{Parser, Subcommand};
use log::LevelFilter;
#[derive(Debug, Parser)]
#[command(name = "chaintools")]
#[command(about = "work with .chain files in rust")]
#[command(version = env!("CARGO_PKG_VERSION"))]
#[command(author = env!("CARGO_PKG_AUTHORS"))]
pub struct Cli {
#[arg(
short = 't',
long,
global = true,
value_name = "N",
help_heading = "Global Options",
default_value_t = num_cpus::get()
)]
threads: usize,
#[arg(
long,
global = true,
value_name = "LEVEL",
help = "Set CLI logging level: off, error, warn, info, debug, trace",
help_heading = "Global Options"
)]
level: Option<LevelFilter>,
#[command(subcommand)]
command: Command,
}
#[derive(Debug, Subcommand)]
enum Command {
#[command(
name = "antirepeat",
alias = "anti-repeat",
about = "Filter chains dominated by repeats or degenerate DNA"
)]
AntiRepeat(anti_repeat::AntiRepeatArgs),
#[command(about = "Convert chain files to BED")]
Bed(bed::BedArgs),
#[command(about = "Filter chain files")]
Filter(filter::FilterArgs),
#[command(about = "Merge chain files")]
Merge(merge::MergeArgs),
#[command(about = "Recompute chain scores from sequence (UCSC chainScore-compatible)")]
Score(score::ScoreArgs),
#[command(about = "Split chain files")]
Split(split::SplitArgs),
#[command(about = "Sort chain files")]
Sort(sort::SortArgs),
}
impl std::fmt::Display for Command {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Command::AntiRepeat(_) => f.write_str("antirepeat"),
Command::Bed(_) => f.write_str("bed"),
Command::Filter(_) => f.write_str("filter"),
Command::Merge(_) => f.write_str("merge"),
Command::Score(_) => f.write_str("score"),
Command::Split(_) => f.write_str("split"),
Command::Sort(_) => f.write_str("sort"),
}
}
}
#[derive(Debug)]
pub enum CliError {
Message(String),
Io(io::Error),
Chain(ChainError),
}
impl fmt::Display for CliError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
CliError::Message(message) => f.write_str(message),
CliError::Io(err) => write!(f, "I/O error: {err}"),
CliError::Chain(err) => err.fmt(f),
}
}
}
impl std::error::Error for CliError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
CliError::Io(err) => Some(err),
CliError::Chain(err) => Some(err),
CliError::Message(_) => None,
}
}
}
impl From<io::Error> for CliError {
fn from(value: io::Error) -> Self {
CliError::Io(value)
}
}
impl From<ChainError> for CliError {
fn from(value: ChainError) -> Self {
CliError::Chain(value)
}
}
pub fn run<R, W, E>(cli: Cli, stdin: &mut R, stdout: &mut W, stderr: &mut E) -> Result<(), CliError>
where
R: BufRead,
W: Write,
E: Write,
{
configure_threads(cli.threads)?;
let level = resolve_log_level(cli.level);
configure_logging(level)?;
log::info!(
"chaintools [{}] v{}",
&cli.command,
env!("CARGO_PKG_VERSION")
);
let start = std::time::Instant::now();
let result = match cli.command {
Command::AntiRepeat(args) => anti_repeat::run(args, stdin, stdout, stderr),
Command::Bed(args) => bed::run(args, stdin, stdout, stderr),
Command::Filter(args) => filter::run(args, stdin, stdout, stderr),
Command::Merge(args) => merge::run(args, stdin, stdout, stderr),
Command::Score(args) => score::run(args, stdin, stdout, stderr),
Command::Split(args) => split::run(args, stdin, stdout, stderr),
Command::Sort(args) => sort::run(args, stdin, stdout, stderr),
};
log::info!("Execution time: {:?}", start.elapsed());
result
}
fn resolve_log_level(requested: Option<LevelFilter>) -> LevelFilter {
requested.unwrap_or(LevelFilter::Info)
}
pub(crate) fn log_summary(tool: &str, fields: &[(&str, u64)]) {
if !log::log_enabled!(log::Level::Info) {
return;
}
let rendered = fields
.iter()
.map(|(label, value)| format!("{label}={value}"))
.collect::<Vec<_>>()
.join(", ");
log::info!("{tool} complete: {rendered}");
}
pub(crate) fn ensure_inputs_exist(
required: &[(&str, &Path)],
optional: &[(&str, Option<&Path>)],
) -> Result<(), CliError> {
for (label, path) in required {
check_input_exists(label, path)?;
}
for (label, path) in optional {
if let Some(path) = path {
check_input_exists(label, path)?;
}
}
Ok(())
}
fn check_input_exists(label: &str, path: &Path) -> Result<(), CliError> {
match path.try_exists() {
Ok(true) => Ok(()),
Ok(false) => Err(CliError::Message(format!(
"{label} file does not exist: {}",
path.display()
))),
Err(err) => Err(CliError::Message(format!(
"cannot access {label} file {}: {err}",
path.display()
))),
}
}
fn configure_logging(level: LevelFilter) -> Result<(), CliError> {
log::set_max_level(level);
if level == LevelFilter::Off {
return Ok(());
}
simple_logger::SimpleLogger::new()
.with_level(level)
.init()
.map_err(|err| CliError::Message(format!("failed to initialize logger: {err}")))?;
log::set_max_level(level);
Ok(())
}
#[cfg(feature = "parallel")]
fn configure_threads(threads: usize) -> Result<(), CliError> {
rayon::ThreadPoolBuilder::new()
.num_threads(threads)
.build()
.map_err(|err| {
CliError::Message(format!("failed to configure global thread pool: {err}"))
})?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use clap::Parser;
#[test]
fn parses_reserved_globals_before_subcommand() {
let cli = Cli::try_parse_from([
"chaintools",
"--threads",
"4",
"--level",
"debug",
"filter",
"--chain",
"input.chain",
])
.expect("global options should parse");
assert_eq!(cli.threads, 4);
assert_eq!(cli.level, Some(LevelFilter::Debug));
assert!(matches!(cli.command, Command::Filter(_)));
}
#[test]
fn parses_reserved_globals_after_subcommand() {
let cli = Cli::try_parse_from([
"chaintools",
"filter",
"--threads",
"2",
"--level",
"trace",
"--chain",
"input.chain",
])
.expect("global options should parse after subcommand");
assert_eq!(cli.threads, 2);
assert_eq!(cli.level, Some(LevelFilter::Trace));
assert!(matches!(cli.command, Command::Filter(_)));
}
#[test]
fn bed_long_short_parses_as_subcommand_flag() {
let cli = Cli::try_parse_from(["chaintools", "bed", "-L", "--side", "reference"])
.expect("bed long-format short flag should parse after subcommand");
assert_eq!(cli.level, None);
assert!(matches!(cli.command, Command::Bed(_)));
}
#[test]
fn rejects_unknown_log_level() {
let err = Cli::try_parse_from(["chaintools", "--level", "chatty", "filter"])
.expect_err("unknown log level should be rejected");
assert_eq!(err.kind(), clap::error::ErrorKind::ValueValidation);
}
#[test]
fn defaults_to_info_when_unspecified() {
let cli = Cli::try_parse_from(["chaintools", "filter"]).expect("args parse");
assert_eq!(resolve_log_level(cli.level), LevelFilter::Info);
}
#[test]
fn honors_requested_level() {
let cli =
Cli::try_parse_from(["chaintools", "--level", "debug", "filter"]).expect("args parse");
assert_eq!(resolve_log_level(cli.level), LevelFilter::Debug);
}
#[test]
fn explicit_off_silences_logging() {
let cli =
Cli::try_parse_from(["chaintools", "--level", "off", "filter"]).expect("args parse");
assert_eq!(resolve_log_level(cli.level), LevelFilter::Off);
}
#[test]
fn stdout_output_still_defaults_to_info() {
let cli = Cli::try_parse_from([
"chaintools",
"anti-repeat",
"--reference",
"target.2bit",
"--query",
"query.2bit",
])
.expect("args parse");
assert_eq!(resolve_log_level(cli.level), LevelFilter::Info);
}
#[test]
fn stdout_output_honors_requested_level() {
let cli =
Cli::try_parse_from(["chaintools", "--level", "info", "sort"]).expect("args parse");
assert_eq!(resolve_log_level(cli.level), LevelFilter::Info);
}
#[test]
fn out_chain_output_defaults_to_info() {
let cli = Cli::try_parse_from(["chaintools", "filter", "--out-chain", "out.chain"])
.expect("args parse");
assert_eq!(resolve_log_level(cli.level), LevelFilter::Info);
}
}
#[cfg(not(feature = "parallel"))]
fn configure_threads(threads: usize) -> Result<(), CliError> {
if threads == 1 {
return Ok(());
} else {
return Err(CliError::Message(
"--threads requires chaintools to be built with the `parallel` feature".to_owned(),
));
}
}