use clap::{Arg, Command};
use orphos_core::config::{OrphosConfig, OutputFormat};
use orphos_core::output::write_results;
use orphos_core::*;
use std::fs::File;
use std::io::{self, BufWriter, Write};
fn main() -> Result<(), Box<dyn std::error::Error>> {
let matches = Command::new("orphos")
.version(env!("CARGO_PKG_VERSION"))
.about("Prokaryotic gene finding algorithm")
.arg(
Arg::new("input")
.short('i')
.long("input")
.value_name("FILE")
.help("Input FASTA file (default: stdin)"),
)
.arg(
Arg::new("output")
.short('o')
.long("output")
.value_name("FILE")
.help("Output file (default: stdout)"),
)
.arg(
Arg::new("format")
.short('f')
.long("format")
.value_name("FORMAT")
.help("Output format: gbk, gff, sco, gca, bed")
.default_value("gbk"),
)
.arg(
Arg::new("mode")
.short('p')
.long("mode")
.value_name("MODE")
.help("Analysis mode: single or meta")
.default_value("single"),
)
.arg(
Arg::new("closed")
.short('c')
.long("closed")
.help("Closed ends (no genes off edges)"),
)
.arg(
Arg::new("circular")
.short('r')
.long("circular")
.help("Circular topology (allow wraparound genes)"),
)
.arg(
Arg::new("mask")
.short('m')
.long("mask")
.help("Mask runs of N's"),
)
.arg(
Arg::new("quiet")
.short('q')
.long("quiet")
.help("Quiet mode"),
)
.arg(
Arg::new("training")
.short('t')
.long("training")
.value_name("FILE")
.help("Training file"),
)
.arg(
Arg::new("translation-table")
.short('g')
.long("translation-table")
.value_name("TABLE")
.help("Translation table (1-25)"),
)
.get_matches();
let mut options = OrphosConfig {
metagenomic: matches.get_one::<String>("mode").map(|s| s.as_str()) == Some("meta"),
closed_ends: matches.contains_id("closed"),
circular: matches.contains_id("circular"),
mask_n_runs: matches.contains_id("mask"),
quiet: matches.contains_id("quiet"),
..Default::default()
};
if options.circular && options.closed_ends {
return Err("Cannot use --circular together with --closed".into());
}
if let Some(tt_str) = matches.get_one::<String>("translation-table") {
let tt: u8 = tt_str
.parse()
.map_err(|_| "Invalid translation table number")?;
if !(1..=25).contains(&tt) || tt == 7 || tt == 8 || (17..=20).contains(&tt) {
return Err("Invalid translation table specified".into());
}
options.translation_table = Some(tt);
}
options.output_format = match matches.get_one::<String>("format").unwrap().as_str() {
"gbk" | "genbank" => OutputFormat::Genbank,
"gff" => OutputFormat::Gff,
"sco" => OutputFormat::Sco,
"gca" => OutputFormat::Gca,
"bed" => OutputFormat::Bed,
_ => return Err("Invalid output format".into()),
};
let mut orphos = OrphosAnalyzer::new(options);
let results = if let Some(input_file) = matches.get_one::<String>("input") {
orphos.analyze_fasta_file(input_file)?
} else {
unimplemented!("Reading from stdin is not yet implemented");
};
let mut writer: Box<dyn Write> = if let Some(output_file) = matches.get_one::<String>("output")
{
Box::new(BufWriter::new(File::create(output_file)?))
} else {
Box::new(BufWriter::new(io::stdout()))
};
for result in &results {
write_results(&mut writer, result, orphos.config.output_format)?;
}
if !matches.contains_id("quiet") {
eprintln!(
"Analysis complete! Found {} genes in {} sequences.",
results.iter().map(|r| r.genes.len()).sum::<usize>(),
results.len()
);
let total_dicodons: u32 = results.iter().map(|r| r.training_used.total_dicodons).sum();
if total_dicodons > 0 {
eprintln!("Total dicodons counted: {}", total_dicodons);
}
}
Ok(())
}