cyto_ibu_barcode_correct/
lib.rs1mod stats;
2mod whitelist;
3
4pub use stats::{CorrectStats, FormattedStats};
5pub use whitelist::Whitelist;
6
7use std::path::Path;
8
9use anyhow::Result;
10use cyto_cli::ibu::ArgsBarcode;
11use cyto_io::{match_input, match_output, match_output_stderr};
12use ibu::{Reader, Record, Writer};
13use log::trace;
14
15use crate::whitelist::Correction;
16
17fn write_statistics<P: AsRef<Path>>(path: Option<P>, stats: CorrectStats) -> Result<()> {
18 let mut writer = match_output_stderr(path)?;
19 let format_stats = FormattedStats::new(stats);
20 serde_json::to_writer_pretty(&mut writer, &format_stats)?;
21 writer.flush()?;
22 Ok(())
23}
24
25pub fn run_with_prebuilt_whitelist(args: &ArgsBarcode, mut whitelist: Whitelist) -> Result<()> {
27 let input = match_input(args.input.input.as_ref())?;
29
30 let reader = Reader::new(input)?;
32 let header = reader.header();
33
34 let output = match_output(args.options.output.as_ref())?;
36 let mut writer = Writer::new(output, header)?;
37
38 let mut stats = CorrectStats::default();
40 let mut second_pass = Vec::new();
41
42 trace!(
43 "Starting first pass [file: {}, exact_match: {}]",
44 args.input.input.as_deref().unwrap_or("stdin"),
45 args.options.exact
46 );
47 for record in reader {
48 let record = record?;
49 let barcode = record.barcode;
50 stats.total += 1;
51
52 match whitelist.correct_to(barcode, args.options.exact) {
54 Correction::Ambiguous => {
55 if args.options.skip_second_pass {
56 stats.ambiguous += 1;
57 if args.options.include {
58 writer.write_record(&record)?;
59 }
60 } else {
61 second_pass.push(record); }
63 }
64 Correction::Unchanged => {
65 stats.matched += 1;
66 stats.unchanged += 1;
67 whitelist.increment(barcode);
68 writer.write_record(&record)?;
69 }
70 Correction::Corrected(corrected) => {
71 stats.matched += 1;
72 stats.corrected += 1;
73 whitelist.increment(corrected);
74 let new_record = Record::new(corrected, record.umi, record.index);
75 writer.write_record(&new_record)?;
76 }
77 }
78 }
79
80 if !second_pass.is_empty() && !args.options.exact {
81 trace!(
82 "Starting second pass (ambiguous subset) [file: {}]...",
83 args.input.input.as_deref().unwrap_or("stdin")
84 );
85 for record in second_pass {
86 match whitelist.ambiguously_correct_to_(record.barcode) {
87 Correction::Ambiguous => {
88 stats.ambiguous += 1;
89 if args.options.include {
91 writer.write_record(&record)?;
92 }
93 }
94 Correction::Unchanged => {
95 stats.matched += 1;
96 stats.unchanged += 1;
97 writer.write_record(&record)?;
98 }
99 Correction::Corrected(corrected) => {
100 stats.matched += 1;
101 stats.corrected += 1;
102 stats.corrected_via_counts += 1;
103 let new_record = Record::new(corrected, record.umi, record.index);
104 writer.write_record(&new_record)?;
105 }
106 }
107 }
108 }
109
110 writer.finish()?;
112
113 write_statistics(args.options.log.as_ref(), stats)?;
115 Ok(())
116}
117
118pub fn run(args: &ArgsBarcode) -> Result<()> {
119 let whitelist = Whitelist::from_path(&args.options.whitelist)?;
120 run_with_prebuilt_whitelist(args, whitelist)
121}