1use std::ops::RangeInclusive;
2
3use crate::{
4 align_motifs_multi_threaded,
5 alignment::align_motifs_distance,
6 generate_consensus_string, load_data, run_gibbs_sampler, run_median_string,
7 run_randomized_motif_search, unique_motifs,
8 utils::{
9 create_output_file, generate_vector_space_delimited, output_results_to_file,
10 write_file_header,
11 },
12 Error,
13};
14use chrono::Utc;
15use clap::{Args, Parser, Subcommand};
16use clap_verbosity_flag::InfoLevel;
17use rayon::prelude::*;
18use tracing::{error, info, trace};
19#[derive(Debug, Parser)]
21#[command(author, version, about, long_about = None)]
22pub struct MotifFinder {
23 #[clap(flatten)]
24 global_opts: GlobalOpts,
25 #[command(subcommand)]
26 pub command: Commands,
27 #[clap(flatten)]
28 pub verbose: clap_verbosity_flag::Verbosity<InfoLevel>,
29}
30
31impl MotifFinder {
32 pub fn exec(mut self) -> Result<(), Error> {
33 let dt = Utc::now();
34 let start_time: i64 = dt.timestamp_micros();
35 println!("Welcome to MotifFinder!");
36 let sequences = load_data(&self.global_opts.input_file, self.global_opts.num_entries)?;
37 self.global_opts.num_entries = sequences.len();
38 let GlobalOpts { k, .. } = self.global_opts;
39
40 let (file, file_path) = if let Some(save_flag) = &self.global_opts.output_file {
41 let (mut file, file_path) = create_output_file(save_flag, k, start_time)?;
42 match write_file_header(
43 &mut file,
44 self.global_opts.k,
45 self.global_opts.num_entries,
46 &self.command,
47 dt,
48 ) {
49 Ok(()) => {
50 trace!("Wrote file header to {}", file_path);
51 }
52 Err(_err) => return Err(Error::IOError),
53 }
54 (Some(file), Some(file_path))
55 } else {
56 (None, None)
57 };
58 let command_clone = (self.command).clone();
59 let motifs = match self.command {
60 Commands::GibbsSampler {
61 num_iterations,
62 num_runs,
63 } => run_gibbs_sampler(&sequences, k, num_runs, num_iterations),
64 Commands::MedianString => run_median_string(&sequences, k),
65 Commands::Randomized { num_runs } => {
66 run_randomized_motif_search(&sequences, k, num_runs)
67 }
68 Commands::FindMotif { motif, distance } => {
69 align_motifs_distance(&sequences, &motif, distance);
70 Ok(vec![motif])
71 }
72 }?;
73 let unique_motifs: Vec<String> = unique_motifs(&motifs).into_par_iter().collect();
74 let unique_motifs_string = generate_vector_space_delimited(&unique_motifs);
75 println!("Unique motifs: {}", unique_motifs_string);
76 let consensus_string = generate_consensus_string(&motifs, k)?;
77 println!("Consensus string: {}", consensus_string);
78
79 let (best_motif_score, best_motif) = if self.global_opts.align {
80 let top_five = align_motifs_multi_threaded(&sequences, &unique_motifs)?;
81 println!("Top 5 motifs:");
82 for (score, motif) in &top_five {
83 println!("{}: {}", score, motif);
84 }
85 let (best_motif_score, best_motif) = top_five[0].clone();
86 align_motifs_distance(&sequences, &consensus_string, 1);
87 (Some(best_motif_score), Some(best_motif))
88 } else {
89 (None, None)
90 };
91 let dt_end = if let Some(mut file) = file {
92 let summary = Summary {
93 consensus_string,
94 best_motif,
95 best_motif_score,
96 unique_motifs: unique_motifs_string,
97 };
98 match output_results_to_file(&mut file, &motifs, &summary, command_clone) {
99 Ok(dt_end) => {
100 println!("Results saved to {}", file_path.ok_or(Error::IOError)?);
101 dt_end
102 }
103 Err(_err) => {
104 error!(
105 "Error writing to file: {}",
106 file_path.ok_or(Error::IOError)?
107 );
108 return Err(Error::IOError);
109 }
110 }
111 } else {
112 Utc::now()
113 };
114
115 if let Some(duration) = dt_end.signed_duration_since(dt).num_microseconds() {
116 info!("Done in {} seconds", duration as f64 / 1_000_000.0);
117 }
118 Ok(())
119 }
120}
121
122#[derive(Debug, Args)]
123struct GlobalOpts {
124 input_file: String,
126
127 #[arg(short = 'e', long = "entries")]
129 pub num_entries: usize,
130
131 #[arg(short,value_parser=k_in_range,default_value_t=8)]
133 pub k: usize,
134
135 #[arg(short = 'a', long = "align")]
137 align: bool,
138
139 #[arg(short = 'o', long = "output")]
141 output_file: Option<Option<String>>,
142}
143const K_RANGE: RangeInclusive<usize> = 1..=64;
144
145fn k_in_range(s: &str) -> Result<usize, String> {
146 let k: usize = s
147 .parse()
148 .map_err(|_| format!("`{s}` isn't a valid k value"))?;
149 if K_RANGE.contains(&k) {
150 Ok(k)
151 } else {
152 Err(format!(
153 "k not in range {}-{}",
154 K_RANGE.start(),
155 K_RANGE.end()
156 ))
157 }
158}
159#[derive(Subcommand, Debug, Clone)]
160pub enum Commands {
161 #[clap(name = "gibbs", about = "Run the Gibbs Sampler algorithm")]
162 GibbsSampler {
163 #[arg(short = 'r', long = "runs")]
165 num_runs: usize,
166
167 #[arg(short = 't', long = "iters")]
169 num_iterations: usize,
170 },
171
172 #[clap(
173 name = "median",
174 about = "Run the Median String algorithm (Warning: this can take a long time to run for large values of k)"
175 )]
176 MedianString,
177
178 #[clap(
179 name = "randomized",
180 about = "Run the Randomized Motif Search algorithm"
181 )]
182 Randomized {
183 #[arg(short = 'r', long = "runs")]
185 num_runs: usize,
186 },
187 #[clap(name = "find_motif", about = "Find a motif in a genome")]
188 FindMotif {
189 motif: String,
191
192 #[arg(default_value_t = 0)]
194 distance: u8,
195 },
196}
197
198pub struct Summary {
199 pub consensus_string: String,
200 pub unique_motifs: String,
201 pub best_motif: Option<String>,
202 pub best_motif_score: Option<isize>,
203}