1use anyhow::{anyhow, Result};
2use chrono::{DateTime, Local};
3use num_format::{Locale, ToFormattedString};
4use std::{
5 fs::{File, OpenOptions},
6 io::{stdout, Write},
7 path::Path,
8 sync::{
9 atomic::{AtomicU32, Ordering},
10 Arc, Mutex,
11 },
12};
13
14use ahash::{AHashSet, HashMap, HashMapExt};
15
16use itertools::Itertools;
17
18use crate::{
19 arguments::Args,
20 info::{
21 MaxSeqErrors, Results, ResultsEnrichment, ResultsHashmap, SequenceErrors, SequenceFormat,
22 },
23};
24
25#[derive(PartialEq, Clone)]
26enum EnrichedType {
27 Single,
28 Double,
29 Full,
30}
31
32pub struct WriteFiles {
34 results: Results,
35 results_enriched: ResultsEnrichment,
36 sequence_format: SequenceFormat,
37 counted_barcodes_hash: Vec<HashMap<String, String>>,
38 samples_barcode_hash: HashMap<String, String>,
39 compounds_written: AHashSet<String>,
40 args: Args,
41 output_files: Vec<String>,
42 output_counts: Vec<usize>,
43 merged_count: usize,
44 merge_text: String,
45 sample_text: String,
46}
47
48impl WriteFiles {
49 pub fn new(
50 results_arc: Arc<Mutex<Results>>,
51 sequence_format: SequenceFormat,
52 counted_barcodes_hash: Vec<HashMap<String, String>>,
53 samples_barcode_hash: HashMap<String, String>,
54 args: Args,
55 ) -> Result<Self> {
56 let results = Arc::try_unwrap(results_arc).unwrap().into_inner().unwrap();
57 Ok(WriteFiles {
58 results,
59 results_enriched: ResultsEnrichment::new(),
60 sequence_format,
61 counted_barcodes_hash,
62 samples_barcode_hash,
63 compounds_written: AHashSet::new(),
64 args,
65 output_files: Vec::new(),
66 output_counts: Vec::new(),
67 merged_count: 0,
68 merge_text: String::new(),
69 sample_text: String::new(),
70 })
71 }
72
73 pub fn write_counts_files(&mut self) -> Result<()> {
75 let unknown_sample = "barcode".to_string();
76 let mut sample_barcodes = match &self.results.results_hashmap {
78 ResultsHashmap::RandomBarcode(random_hashmap) => {
79 random_hashmap.keys().cloned().collect::<Vec<String>>()
80 }
81 ResultsHashmap::NoRandomBarcode(count_hashmap) => {
82 count_hashmap.keys().cloned().collect::<Vec<String>>()
83 }
84 };
85
86 if self.args.enrich {
87 self.results_enriched.add_sample_barcodes(&sample_barcodes);
88 }
89
90 if !self.samples_barcode_hash.is_empty() {
92 sample_barcodes.sort_by_key(|barcode| {
93 self.samples_barcode_hash
94 .get(barcode)
95 .unwrap_or(&unknown_sample)
96 })
97 }
98
99 let output_dir = self.args.output_dir.clone();
101 let directory = Path::new(&output_dir);
102
103 let mut header = self.create_header();
104 if self.args.merge_output {
106 if sample_barcodes.len() == 1 {
107 eprintln!("Merged file cannot be created without multiple sample barcodes");
108 println!();
109 self.args.merge_output = false;
110 } else {
111 let mut merged_header = header.clone();
113 for sample_barcode in &sample_barcodes {
114 let sample_name = if self.samples_barcode_hash.is_empty() {
115 sample_barcode
116 } else {
117 self
119 .samples_barcode_hash
120 .get(sample_barcode)
121 .unwrap_or(&unknown_sample)
122 };
123 merged_header.push(',');
124 merged_header.push_str(sample_name);
125 }
126 merged_header.push('\n');
127 self.merge_text.push_str(&merged_header);
128 }
129 }
130
131 header.push_str(",Count\n");
133
134 for sample_barcode in &sample_barcodes {
136 let sample_name = if !self.samples_barcode_hash.is_empty() {
137 self
138 .samples_barcode_hash
139 .get(sample_barcode)
140 .unwrap_or(&unknown_sample)
141 } else {
142 sample_barcode
143 };
144 let file_name = format!("{}_{}_counts.csv", self.args.prefix, sample_name);
145 println!("{}", file_name);
146 self.output_files.push(file_name.clone());
147 let output_path = directory.join(file_name);
149
150 self.sample_text.push_str(&header);
151 let count =
152 self.add_counts_string(sample_barcode, &sample_barcodes, EnrichedType::Full)?;
153
154 let mut output = File::create(output_path)?; output.write_all(self.sample_text.as_bytes())?;
156 self.sample_text.clear();
157 self.output_counts.push(count);
158 }
159 if self.args.merge_output {
160 let merged_file_name = format!("{}{}", self.args.prefix, "_counts.all.csv");
161 println!("{}", merged_file_name);
162 println!(
163 "Barcodes counted: {}",
164 self.merged_count.to_formatted_string(&Locale::en)
165 );
166 self.output_files.push(merged_file_name.clone());
167 let merged_output_path = directory.join(merged_file_name);
168 let mut merged_output_file = File::create(merged_output_path)?;
169 merged_output_file.write_all(self.merge_text.as_bytes())?;
170 self.merge_text.clear();
171 self.output_counts.insert(0, self.merged_count);
172 self.merged_count = 0;
173 }
174 if self.args.enrich {
175 self.write_enriched_files(EnrichedType::Single)?;
176 if self.sequence_format.barcode_num > 2 {
177 self.write_enriched_files(EnrichedType::Double)?;
178 }
179 }
180 Ok(())
181 }
182
183 fn create_header(&self) -> String {
185 let mut header = String::new();
187 if self.sequence_format.barcode_num > 1 {
188 header = "Barcode_1".to_string();
189 for num in 1..self.sequence_format.barcode_num {
190 header.push_str(&format!(",Barcode_{}", num + 1))
191 }
192 } else {
193 header.push_str("Barcode")
194 }
195 header
196 }
197
198 fn add_counts_string(
200 &mut self,
201 sample_barcode: &str,
202 sample_barcodes: &[String],
203 enrichment: EnrichedType, ) -> Result<usize> {
205 let mut hash_holder: HashMap<String, HashMap<String, usize>> = HashMap::new(); let codes = match enrichment {
208 EnrichedType::Single => {
209 hash_holder = self.results_enriched.single_hashmap.clone();
210 hash_holder
211 .get(sample_barcode)
212 .unwrap()
213 .keys()
214 .cloned()
215 .collect::<Vec<String>>()
216 }
217 EnrichedType::Double => {
218 hash_holder = self.results_enriched.double_hashmap.clone();
219 hash_holder
220 .get(sample_barcode)
221 .unwrap()
222 .keys()
223 .cloned()
224 .collect::<Vec<String>>()
225 }
226 EnrichedType::Full => match &self.results.results_hashmap {
227 ResultsHashmap::NoRandomBarcode(count_hashmap) => count_hashmap
228 .get(sample_barcode)
229 .unwrap()
230 .keys()
231 .cloned()
232 .collect::<Vec<String>>(),
233 ResultsHashmap::RandomBarcode(random_hashmap) => random_hashmap
234 .get(sample_barcode)
235 .unwrap()
236 .keys()
237 .cloned()
238 .collect::<Vec<String>>(),
239 },
240 };
241
242 let mut barcode_num = 0;
243 for (line_num, code) in codes.iter().enumerate() {
244 let count = match enrichment {
245 EnrichedType::Single => *self
246 .results_enriched
247 .single_hashmap
248 .get(sample_barcode)
249 .unwrap()
250 .get(code)
251 .unwrap(),
252 EnrichedType::Double => *self
253 .results_enriched
254 .double_hashmap
255 .get(sample_barcode)
256 .unwrap()
257 .get(code)
258 .unwrap(),
259 EnrichedType::Full => match &self.results.results_hashmap {
260 ResultsHashmap::NoRandomBarcode(count_hashmap) => *count_hashmap
261 .get(sample_barcode)
262 .unwrap()
263 .get(code)
264 .unwrap(),
265 ResultsHashmap::RandomBarcode(random_hashmap) => random_hashmap
266 .get(sample_barcode)
267 .unwrap()
268 .get(code)
269 .unwrap()
270 .len(),
271 },
272 };
273 barcode_num = line_num + 1;
274 if barcode_num % 50000 == 0 {
276 print!(
277 "Barcodes counted: {}\r",
278 barcode_num.to_formatted_string(&Locale::en)
279 );
280 stdout().flush()?;
281 }
282 let written_barcodes = if enrichment == EnrichedType::Full && !self.counted_barcodes_hash.is_empty() {
283 convert_code(code, &self.counted_barcodes_hash)
285 } else {
286 code.to_string()
287 };
288
289 if self.args.merge_output {
291 let new = self.compounds_written.insert(code.to_string());
293 if new {
294 self.merged_count += 1;
295 let mut merged_row = written_barcodes.clone();
297 for sample_barcode in sample_barcodes {
299 merged_row.push(',');
300 let sample_count = match enrichment {
302 EnrichedType::Single => hash_holder
303 .get(sample_barcode)
304 .unwrap()
305 .get(code)
306 .unwrap_or(&0)
307 .to_string(),
308
309 EnrichedType::Double => hash_holder
310 .get(sample_barcode)
311 .unwrap()
312 .get(code)
313 .unwrap_or(&0)
314 .to_string(),
315
316 EnrichedType::Full => match &self.results.results_hashmap {
317 ResultsHashmap::RandomBarcode(random_hashmap) => random_hashmap
318 .get(sample_barcode)
319 .unwrap()
320 .get(code)
321 .unwrap_or(&AHashSet::new())
322 .len()
323 .to_string(),
324 ResultsHashmap::NoRandomBarcode(count_hashmap) => count_hashmap
325 .get(sample_barcode)
326 .unwrap()
327 .get(code)
328 .unwrap_or(&0)
329 .to_string(),
330 },
331 };
332 merged_row.push_str(&sample_count);
333 }
334 merged_row.push('\n');
335 self.merge_text.push_str(&merged_row);
337 }
338 }
339 let row = format!("{},{}\n", written_barcodes, count);
341 self.sample_text.push_str(&row);
342 if enrichment == EnrichedType::Full && self.args.enrich {
347 self.results_enriched
348 .add_single(sample_barcode, &written_barcodes, count);
349 if self.sequence_format.barcode_num > 2 {
350 self.results_enriched
351 .add_double(sample_barcode, &written_barcodes, count);
352 }
353 }
354 }
355 print!(
356 "Barcodes counted: {}\r",
357 barcode_num.to_formatted_string(&Locale::en)
358 );
359 println!();
360 Ok(barcode_num)
361 }
362
363 fn write_enriched_files(&mut self, enrichment: EnrichedType) -> Result<()> {
365 let unknown_sample = "barcode".to_string();
366 let mut sample_barcodes = match enrichment {
368 EnrichedType::Single => self
369 .results_enriched
370 .single_hashmap
371 .keys()
372 .cloned()
373 .collect::<Vec<String>>(),
374 EnrichedType::Double => self
375 .results_enriched
376 .double_hashmap
377 .keys()
378 .cloned()
379 .collect::<Vec<String>>(),
380 EnrichedType::Full => {
381 return Err(anyhow!(
382 "Does not work with Full enrichment type. Only Single and Double"
383 ))
384 }
385 };
386
387 if !self.samples_barcode_hash.is_empty() {
389 sample_barcodes.sort_by_key(|barcode| {
390 self.samples_barcode_hash
391 .get(barcode)
392 .unwrap_or(&unknown_sample)
393 })
394 }
395
396 let descriptor = match enrichment {
398 EnrichedType::Single => "Single",
399 EnrichedType::Double => "Double",
400 EnrichedType::Full => {
401 return Err(anyhow!(
402 "Does not work with Full enrichment type. Only Single and Double"
403 ))
404 }
405 };
406
407 let output_dir = self.args.output_dir.clone();
409 let directory = Path::new(&output_dir);
410
411 let mut header = self.create_header();
412 if self.args.merge_output {
414 let mut merged_header = header.clone();
415 for sample_barcode in &sample_barcodes {
416 let sample_name = if self.samples_barcode_hash.is_empty() {
417 sample_barcode
418 } else {
419 self
421 .samples_barcode_hash
422 .get(sample_barcode)
423 .unwrap_or(&unknown_sample)
424 };
425 merged_header.push(',');
426 merged_header.push_str(sample_name);
427 }
428 merged_header.push('\n');
429 self.merge_text.push_str(&merged_header);
430 }
431
432 header.push_str(",Count\n");
434
435 for sample_barcode in &sample_barcodes {
437 let sample_name = if !self.samples_barcode_hash.is_empty() {
439 self
440 .samples_barcode_hash
441 .get(sample_barcode)
442 .unwrap_or(&unknown_sample)
443 } else {
444 sample_barcode
445 };
446 let file_name = format!(
447 "{}_{}_counts.{}.csv",
448 self.args.prefix, sample_name, descriptor
449 );
450 println!("{}", file_name);
451 self.output_files.push(file_name.clone());
452 let output_path = directory.join(file_name);
454
455 self.sample_text.push_str(&header);
456 let count =
457 self.add_counts_string(sample_barcode, &sample_barcodes, enrichment.clone())?;
458 let mut output = File::create(output_path)?; output.write_all(self.sample_text.as_bytes())?;
460 self.sample_text.clear();
461 self.output_counts.push(count);
463 }
464 if self.args.merge_output {
466 let merged_file_name = format!("{}_counts.all.{}.csv", self.args.prefix, descriptor);
468 println!("{}", merged_file_name);
469 self.output_files.push(merged_file_name.clone());
470 let merged_output_path = directory.join(merged_file_name);
471 let mut merged_output_file = File::create(merged_output_path)?;
472 merged_output_file.write_all(self.merge_text.as_bytes())?;
473 println!(
474 "Barcodes counted: {}",
475 self.merged_count.to_formatted_string(&Locale::en)
476 );
477 self.merge_text.clear();
478 self.output_counts.insert(
479 self.output_counts.len() - sample_barcodes.len(),
480 self.merged_count,
481 );
482 self.merged_count = 0;
483 }
484 Ok(())
485 }
486
487 pub fn write_stats_file(
489 &self,
490 start_time: DateTime<Local>,
491 max_sequence_errors: MaxSeqErrors,
492 seq_errors: SequenceErrors,
493 total_reads: Arc<AtomicU32>,
494 sequence_format: SequenceFormat,
495 ) -> Result<()> {
496 let output_dir = self.args.output_dir.clone();
498 let directory = Path::new(&output_dir);
499 let stat_filename = directory.join(format!("{}_barcode_stats.txt", self.args.prefix));
500 let mut stat_file = OpenOptions::new()
502 .write(true)
503 .append(true)
504 .create(true)
505 .open(stat_filename)?;
506
507 let now = Local::now();
509 let elapsed_time = now - start_time;
510 stat_file.write_all(
512 format!(
513 "-TIME INFORMATION-\nStart: {}\nFinish: {}\nTotal time: {} hours, {} minutes, {}.{} seconds\n\n",
514 start_time.format("%Y-%m-%d %H:%M:%S"),
515 now.format("%Y-%m-%d %H:%M:%S"),
516 elapsed_time.num_hours(),
517 elapsed_time.num_minutes() % 60,
518 elapsed_time.num_seconds() % 60,
519 millisecond_decimal(elapsed_time)
520 )
521 .as_bytes(),
522 )?;
523 stat_file.write_all(
525 format!(
526 "-INPUT FILES-\nFastq: {}\nFormat: {}\nSamples: {}\nBarcodes: {}\n\n",
527 self.args.fastq,
528 self.args.format,
529 self.args
530 .sample_barcodes_option
531 .as_ref()
532 .unwrap_or(&"None".to_string()),
533 self.args
534 .counted_barcodes_option
535 .as_ref()
536 .unwrap_or(&"None".to_string())
537 )
538 .as_bytes(),
539 )?;
540 stat_file.write_all(format!("{}\n\n", sequence_format).as_bytes())?;
542 stat_file.write_all(format!("{}\n", max_sequence_errors).as_bytes())?;
544 stat_file.write_all(
546 format!(
547 "-RESULTS-\nTotal sequences: {}\n{}\n\n",
548 total_reads
549 .load(Ordering::Relaxed)
550 .to_formatted_string(&Locale::en),
551 seq_errors
552 )
553 .as_bytes(),
554 )?;
555 stat_file.write_all("-OUTPUT FILES-\n".as_bytes())?;
557 for (file_name, counts) in self.output_files.iter().zip(self.output_counts.iter()) {
558 stat_file.write_all(
559 format!(
560 "File & barcodes counted: {}\t{}\n",
561 file_name,
562 counts.to_formatted_string(&Locale::en)
563 )
564 .as_bytes(),
565 )?;
566 }
567 stat_file.write_all("\n".as_bytes())?;
568 if self.args.fastq.ends_with("gz") && total_reads.load(Ordering::Relaxed) < 1_000_000 {
569 let warning = "WARNING: The program may have stopped early with the gzipped file. Unzip the fastq.gz and rerun the algorithm on the unzipped fastq file if the number of reads is expected to be above 1,000,000 ";
570 println!("\n{}\n", warning);
571 stat_file.write_all(format!("\n{}\n", warning).as_bytes())?;
572 }
573 stat_file.write_all("--------------------------------------------------------------------------------------------------\n\n\n".as_bytes())?;
575 Ok(())
576 }
577}
578
579pub fn millisecond_decimal(elapsed_time: chrono::Duration) -> String {
580 let milliseconds =
581 (elapsed_time.num_milliseconds() - (elapsed_time.num_seconds() * 1000)).to_string();
582 let mut final_string = String::new();
583 for _ in milliseconds.chars().count()..3 {
584 final_string.push('0');
585 }
586 final_string.push_str(&milliseconds);
587 final_string
588}
589
590fn convert_code(code: &str, barcodes_hashmap: &[HashMap<String, String>]) -> String {
592 code.split(',')
593 .enumerate()
594 .map(|(barcode_index, barcode)| {
595 let barcode_hash = &barcodes_hashmap[barcode_index];
596 return barcode_hash.get(barcode).unwrap().to_string();
597 })
598 .join(",")
599}
600
601pub fn convert_sample_barcode(
602 sample_barcode: &str,
603 sample_barcodes_hash: &HashMap<String, String>,
604) -> String {
605 if let Some(sample_results) = sample_barcodes_hash.get(sample_barcode) {
606 sample_results.to_string()
607 } else {
608 "barcode".to_string()
609 }
610}