1use std::path::PathBuf;
4
5use anyhow::Context;
6use clap::ArgGroup;
7use clap::Args;
8use indicatif::ProgressBar;
9use indicatif::ProgressStyle;
10use rand::prelude::*;
11use tracing::info;
12
13use crate::generate::providers::reference_provider::ReferenceGenomeSequenceProvider;
14use crate::generate::providers::SequenceProvider;
15use crate::utils::formats;
16
17pub fn error_rate_in_range(error_rate_raw: &str) -> Result<f32, String> {
20 let error_rate: f32 = error_rate_raw
21 .parse()
22 .map_err(|_| format!("{} isn't a float", error_rate_raw))?;
23
24 match (0.0..=1.0).contains(&error_rate) {
25 true => Ok(error_rate),
26 false => Err(String::from("Error rate must be between 0.0 and 1.0")),
27 }
28}
29
30#[derive(Args)]
32#[command(group(ArgGroup::new("record-count").required(true).args(["coverage", "num_records"])))]
33pub struct GenerateArgs {
34 read_ones_file: PathBuf,
36
37 read_twos_file: PathBuf,
39
40 #[arg(required = true)] reference_providers: Vec<String>,
43
44 #[arg(short, long, value_name = "F32", default_value = "0.0001")]
46 #[arg(value_parser = error_rate_in_range)]
47 error_rate: Option<f32>,
48
49 #[arg(short, long, value_name = "USIZE", conflicts_with = "coverage")]
51 num_records: Option<usize>,
52
53 #[arg(short, long, value_name = "USIZE", conflicts_with = "num_records")]
55 coverage: Option<usize>,
56}
57
58pub fn generate(args: GenerateArgs) -> anyhow::Result<()> {
60 let result: anyhow::Result<Vec<_>> = args
62 .reference_providers
63 .iter()
64 .map(|provider_as_string| provider_as_string.parse::<ReferenceGenomeSequenceProvider>())
65 .collect();
66 let reference_providers = result.with_context(|| "issue parsing reference providers")?;
67
68 let reads_one_file = args.read_ones_file;
69 let reads_two_file = args.read_twos_file;
70
71 info!("Starting generate command...");
72 let mut writer_read_one = formats::fastq::writer(&reads_one_file).with_context(|| {
73 format!(
74 "Could not open reads one file: {}.",
75 reads_one_file.display()
76 )
77 })?;
78 let mut writer_read_two = formats::fastq::writer(&reads_two_file).with_context(|| {
79 format!(
80 "Could not open reads two file: {}.",
81 reads_two_file.display()
82 )
83 })?;
84
85 let mut total_reads: usize = 0;
86
87 if let Some(num_records) = args.num_records {
88 total_reads = num_records
89 } else if let Some(coverage) = args.coverage {
90 total_reads = reference_providers[0].reads_needed_for_coverage(coverage);
91 }
92
93 info!("Generating {} reads...", total_reads);
95
96 let pb = ProgressBar::new(total_reads as u64);
98 pb.set_style(
99 ProgressStyle::default_bar()
100 .template("{prefix:.cyan.bold} {spinner:.green} [{elapsed_precise}] [{bar}] {pos}/{len} ({per_sec}, {eta})")
101 .progress_chars("=> "),
102 );
103 pb.set_prefix("Generating");
104
105 let mut rng = ThreadRng::default();
107
108 let mut i = 0;
109 while i < total_reads {
110 let selected_genome = reference_providers
111 .choose_weighted(&mut rng, |x| x.weight)
112 .unwrap();
113 let read_pair = selected_genome.generate_read_pair(
114 format!("ngs:{}", selected_genome.filename),
115 (i + 1).to_string(),
116 );
117 writer_read_one
118 .write_record(read_pair.get_forward_read())
119 .with_context(|| "Could not write record to read one file.")?;
120 writer_read_two
121 .write_record(read_pair.get_reverse_read())
122 .with_context(|| "Could not write record to read two file.")?;
123
124 if i > 0 && i % 5_000 == 0 {
125 pb.inc(5000);
126 }
127
128 i += 1;
129 }
130
131 pb.set_style(
132 ProgressStyle::default_bar()
133 .template("{prefix:.green.bold} {msg:.white.bold} [{elapsed_precise}] [{bar}] {pos}/{len} ({per_sec}, {eta})")
134 .progress_chars("=> "),
135 );
136 pb.set_prefix("✓");
137 pb.finish_with_message("Finished");
138 Ok(())
139}