motif_finder/
command.rs

1use std::ops::RangeInclusive;
2
3use crate::{
4    align_motifs_multi_threaded,
5    alignment::align_motifs_distance,
6    generate_consensus_string, load_data, run_gibbs_sampler, run_median_string,
7    run_randomized_motif_search, unique_motifs,
8    utils::{
9        create_output_file, generate_vector_space_delimited, output_results_to_file,
10        write_file_header,
11    },
12    Error,
13};
14use chrono::Utc;
15use clap::{Args, Parser, Subcommand};
16use clap_verbosity_flag::InfoLevel;
17use rayon::prelude::*;
18use tracing::{error, info, trace};
19/// Motif Finder
20#[derive(Debug, Parser)]
21#[command(author, version, about, long_about = None)]
22pub struct MotifFinder {
23    #[clap(flatten)]
24    global_opts: GlobalOpts,
25    #[command(subcommand)]
26    pub command: Commands,
27    #[clap(flatten)]
28    pub verbose: clap_verbosity_flag::Verbosity<InfoLevel>,
29}
30
31impl MotifFinder {
32    pub fn exec(mut self) -> Result<(), Error> {
33        let dt = Utc::now();
34        let start_time: i64 = dt.timestamp_micros();
35        println!("Welcome to MotifFinder!");
36        let sequences = load_data(&self.global_opts.input_file, self.global_opts.num_entries)?;
37        self.global_opts.num_entries = sequences.len();
38        let GlobalOpts { k, .. } = self.global_opts;
39
40        let (file, file_path) = if let Some(save_flag) = &self.global_opts.output_file {
41            let (mut file, file_path) = create_output_file(save_flag, k, start_time)?;
42            match write_file_header(
43                &mut file,
44                self.global_opts.k,
45                self.global_opts.num_entries,
46                &self.command,
47                dt,
48            ) {
49                Ok(()) => {
50                    trace!("Wrote file header to {}", file_path);
51                }
52                Err(_err) => return Err(Error::IOError),
53            }
54            (Some(file), Some(file_path))
55        } else {
56            (None, None)
57        };
58        let command_clone = (self.command).clone();
59        let motifs = match self.command {
60            Commands::GibbsSampler {
61                num_iterations,
62                num_runs,
63            } => run_gibbs_sampler(&sequences, k, num_runs, num_iterations),
64            Commands::MedianString => run_median_string(&sequences, k),
65            Commands::Randomized { num_runs } => {
66                run_randomized_motif_search(&sequences, k, num_runs)
67            }
68            Commands::FindMotif { motif, distance } => {
69                align_motifs_distance(&sequences, &motif, distance);
70                Ok(vec![motif])
71            }
72        }?;
73        let unique_motifs: Vec<String> = unique_motifs(&motifs).into_par_iter().collect();
74        let unique_motifs_string = generate_vector_space_delimited(&unique_motifs);
75        println!("Unique motifs: {}", unique_motifs_string);
76        let consensus_string = generate_consensus_string(&motifs, k)?;
77        println!("Consensus string: {}", consensus_string);
78
79        let (best_motif_score, best_motif) = if self.global_opts.align {
80            let top_five = align_motifs_multi_threaded(&sequences, &unique_motifs)?;
81            println!("Top 5 motifs:");
82            for (score, motif) in &top_five {
83                println!("{}: {}", score, motif);
84            }
85            let (best_motif_score, best_motif) = top_five[0].clone();
86            align_motifs_distance(&sequences, &consensus_string, 1);
87            (Some(best_motif_score), Some(best_motif))
88        } else {
89            (None, None)
90        };
91        let dt_end = if let Some(mut file) = file {
92            let summary = Summary {
93                consensus_string,
94                best_motif,
95                best_motif_score,
96                unique_motifs: unique_motifs_string,
97            };
98            match output_results_to_file(&mut file, &motifs, &summary, command_clone) {
99                Ok(dt_end) => {
100                    println!("Results saved to {}", file_path.ok_or(Error::IOError)?);
101                    dt_end
102                }
103                Err(_err) => {
104                    error!(
105                        "Error writing to file: {}",
106                        file_path.ok_or(Error::IOError)?
107                    );
108                    return Err(Error::IOError);
109                }
110            }
111        } else {
112            Utc::now()
113        };
114
115        if let Some(duration) = dt_end.signed_duration_since(dt).num_microseconds() {
116            info!("Done in {} seconds", duration as f64 / 1_000_000.0);
117        }
118        Ok(())
119    }
120}
121
122#[derive(Debug, Args)]
123struct GlobalOpts {
124    /// file path of the genome
125    input_file: String,
126
127    /// how many entries to read
128    #[arg(short = 'e', long = "entries")]
129    pub num_entries: usize,
130
131    /// motif length
132    #[arg(short,value_parser=k_in_range,default_value_t=8)]
133    pub k: usize,
134
135    /// alignment
136    #[arg(short = 'a', long = "align")]
137    align: bool,
138
139    /// save motifs to file
140    #[arg(short = 'o', long = "output")]
141    output_file: Option<Option<String>>,
142}
143const K_RANGE: RangeInclusive<usize> = 1..=64;
144
145fn k_in_range(s: &str) -> Result<usize, String> {
146    let k: usize = s
147        .parse()
148        .map_err(|_| format!("`{s}` isn't a valid k value"))?;
149    if K_RANGE.contains(&k) {
150        Ok(k)
151    } else {
152        Err(format!(
153            "k not in range {}-{}",
154            K_RANGE.start(),
155            K_RANGE.end()
156        ))
157    }
158}
159#[derive(Subcommand, Debug, Clone)]
160pub enum Commands {
161    #[clap(name = "gibbs", about = "Run the Gibbs Sampler algorithm")]
162    GibbsSampler {
163        /// number of runs
164        #[arg(short = 'r', long = "runs")]
165        num_runs: usize,
166
167        /// number of iterations per run
168        #[arg(short = 't', long = "iters")]
169        num_iterations: usize,
170    },
171
172    #[clap(
173        name = "median",
174        about = "Run the Median String algorithm (Warning: this can take a long time to run for large values of k)"
175    )]
176    MedianString,
177
178    #[clap(
179        name = "randomized",
180        about = "Run the Randomized Motif Search algorithm"
181    )]
182    Randomized {
183        /// number of runs
184        #[arg(short = 'r', long = "runs")]
185        num_runs: usize,
186    },
187    #[clap(name = "find_motif", about = "Find a motif in a genome")]
188    FindMotif {
189        /// motif to find
190        motif: String,
191
192        /// max distance of motif from sequence
193        #[arg(default_value_t = 0)]
194        distance: u8,
195    },
196}
197
198pub struct Summary {
199    pub consensus_string: String,
200    pub unique_motifs: String,
201    pub best_motif: Option<String>,
202    pub best_motif_score: Option<isize>,
203}