use std::path::PathBuf;
use clap::{Parser, Subcommand};
use rsomics_common::{CommonFlags, Result, RsomicsError, Tool, ToolMeta};
use rsomics_help::{Example, HelpSpec, Origin};
use rsomics_fastq_utils::ops;
pub const META: ToolMeta = ToolMeta {
name: env!("CARGO_PKG_NAME"),
version: env!("CARGO_PKG_VERSION"),
};
#[derive(Parser)]
#[command(
name = "rsomics-fastq-utils",
version,
about = "FASTQ utility toolkit",
disable_help_flag = true
)]
pub struct Cli {
#[command(subcommand)]
command: Command,
#[command(flatten)]
pub common: CommonFlags,
}
#[derive(Subcommand)]
enum Command {
Count { input: Vec<PathBuf> },
Deinterleave {
input: PathBuf,
#[arg(long)]
out1: PathBuf,
#[arg(long)]
out2: PathBuf,
},
Extract {
input: PathBuf,
#[arg(short = 'l', long)]
list: PathBuf,
#[arg(long)]
exclude: bool,
#[arg(short = 'o', long, default_value = "-")]
output: String,
},
Gc {
input: PathBuf,
#[arg(short = 'o', long, default_value = "-")]
output: String,
},
Grep {
input: PathBuf,
#[arg(short = 'p', long)]
pattern: String,
#[arg(long)]
invert_match: bool,
#[arg(short = 'o', long, default_value = "-")]
output: String,
},
Head {
input: PathBuf,
#[arg(short = 'n', long, default_value_t = 10)]
num: u64,
#[arg(short = 'o', long, default_value = "-")]
output: String,
},
Interleave {
#[arg(short = 'i', long)]
in1: PathBuf,
#[arg(short = 'I', long)]
in2: PathBuf,
#[arg(short = 'o', long, default_value = "-")]
output: String,
},
Len {
input: PathBuf,
#[arg(long)]
tab: bool,
#[arg(short = 'o', long, default_value = "-")]
output: String,
},
Rename {
input: PathBuf,
#[arg(long, default_value = "read_")]
prefix: String,
#[arg(short = 'o', long, default_value = "-")]
output: String,
},
Revcomp {
input: PathBuf,
#[arg(short = 'o', long, default_value = "-")]
output: String,
},
Sample {
input: PathBuf,
#[arg(short = 'p', long, default_value_t = 0.1)]
proportion: f64,
#[arg(long, default_value_t = 42)]
seed: u64,
#[arg(short = 'o', long, default_value = "-")]
output: String,
},
Shuffle {
input: PathBuf,
#[arg(long, default_value_t = 42)]
seed: u64,
#[arg(short = 'o', long, default_value = "-")]
output: String,
},
Sort {
input: PathBuf,
#[arg(short = 'l', long)]
by_length: bool,
#[arg(short = 'L', long)]
by_length_desc: bool,
#[arg(short = 'o', long, default_value = "-")]
output: String,
},
Tab {
input: PathBuf,
#[arg(short = 'o', long, default_value = "-")]
output: String,
},
ToFasta {
input: PathBuf,
#[arg(short = 'o', long, default_value = "-")]
output: String,
},
Window {
input: PathBuf,
#[arg(short = 'w', long, default_value_t = 10)]
window: usize,
#[arg(short = 'o', long, default_value = "-")]
output: String,
},
}
fn open_output(path: &str) -> Result<Box<dyn std::io::Write>> {
if path == "-" {
Ok(Box::new(std::io::stdout().lock()))
} else {
Ok(Box::new(
std::fs::File::create(path).map_err(RsomicsError::Io)?,
))
}
}
impl Tool for Cli {
fn meta() -> ToolMeta {
META
}
fn common(&self) -> &CommonFlags {
&self.common
}
#[allow(clippy::too_many_lines)]
fn execute(self) -> Result<()> {
match self.command {
Command::Count { input } => {
let mut total = 0u64;
for path in &input {
total += ops::count::count(path)?;
}
println!("{total}");
}
Command::Deinterleave { input, out1, out2 } => {
let mut w1 = std::io::BufWriter::new(
std::fs::File::create(&out1).map_err(RsomicsError::Io)?,
);
let mut w2 = std::io::BufWriter::new(
std::fs::File::create(&out2).map_err(RsomicsError::Io)?,
);
ops::deinterleave::deinterleave(&input, &mut w1, &mut w2)?;
}
Command::Extract {
input,
list,
exclude,
output,
} => {
let mut out = open_output(&output)?;
ops::extract::extract_fastq(&input, &list, &mut out, exclude)?;
}
Command::Gc { input, output } => {
let mut out = open_output(&output)?;
ops::gc::fastq_gc(&input, &mut out)?;
}
Command::Grep {
input,
pattern,
invert_match,
output,
} => {
let mut out = open_output(&output)?;
ops::grep::grep(&input, &pattern, invert_match, &mut out)?;
}
Command::Head { input, num, output } => {
let mut out = open_output(&output)?;
ops::head::head(&input, num, &mut out)?;
}
Command::Interleave { in1, in2, output } => {
let mut out = open_output(&output)?;
ops::interleave::interleave(&in1, &in2, &mut out)?;
}
Command::Len { input, tab, output } => {
let mut out = open_output(&output)?;
ops::len::lengths(&input, tab, &mut out)?;
}
Command::Rename {
input,
prefix,
output,
} => {
let mut out = open_output(&output)?;
ops::rename::rename(&input, &prefix, &mut out)?;
}
Command::Revcomp { input, output } => {
let mut out = open_output(&output)?;
ops::revcomp::revcomp(&input, &mut out)?;
}
Command::Sample {
input,
proportion,
seed,
output,
} => {
let mut out = open_output(&output)?;
ops::sample::sample(&input, proportion, seed, &mut out)?;
}
Command::Shuffle {
input,
seed,
output,
} => {
let mut out = open_output(&output)?;
ops::shuffle::shuffle_fastq(&input, &mut out, seed)?;
}
Command::Sort {
input,
by_length,
by_length_desc,
output,
} => {
let key = if by_length_desc {
ops::sort::SortKey::LengthDesc
} else if by_length {
ops::sort::SortKey::Length
} else {
ops::sort::SortKey::Name
};
let mut out = open_output(&output)?;
ops::sort::sort(&input, key, &mut out)?;
}
Command::Tab { input, output } => {
let mut out = open_output(&output)?;
ops::tab::fastq_to_tab(&input, &mut out)?;
}
Command::ToFasta { input, output } => {
let mut out = open_output(&output)?;
ops::to_fasta::convert(&input, &mut out)?;
}
Command::Window {
input,
window,
output,
} => {
let mut out = open_output(&output)?;
ops::window::fastq_window(&input, &mut out, window)?;
}
}
Ok(())
}
}
pub static HELP: HelpSpec = HelpSpec {
name: env!("CARGO_PKG_NAME"),
version: env!("CARGO_PKG_VERSION"),
tagline: "FASTQ utility toolkit — lightweight subcommands for counting, filtering, converting, and inspecting FASTQ files.",
origin: Some(Origin {
upstream: "seqkit / seqtk",
upstream_license: "MIT",
our_license: "MIT OR Apache-2.0",
paper_doi: None,
}),
usage_lines: &["<COMMAND> [OPTIONS] <input>"],
sections: &[],
examples: &[
Example {
description: "Count records",
command: "rsomics-fastq-utils count reads.fq.gz",
},
Example {
description: "First 100 reads",
command: "rsomics-fastq-utils head -n 100 reads.fq",
},
Example {
description: "Convert to FASTA",
command: "rsomics-fastq-utils to-fasta reads.fq -o reads.fa",
},
],
json_result_schema_doc: None,
};
#[cfg(test)]
mod tests {
use super::*;
use clap::CommandFactory;
#[test]
fn cli_debug_assert() {
Cli::command().debug_assert();
}
}