holodeck_lib/commands/
eval.rs1use std::collections::BTreeMap;
4use std::io::Write;
5use std::path::PathBuf;
6
7use anyhow::{Context, Result};
8use bstr::ByteSlice;
9use clap::Parser;
10use noodles::bam;
11
12use super::command::{Command, output_path};
13use super::common::OutputPrefixOptions;
14use crate::read_naming::{parse_encoded_pe_name, parse_encoded_se_name};
15
16#[derive(Parser, Debug)]
23#[command(after_long_help = "EXAMPLES:\n \
24 holodeck eval --mapped aligned.bam -o eval_results\n \
25 holodeck eval --mapped aligned.bam --truth golden.bam -o eval_results")]
26pub struct Eval {
27 #[arg(short = 'm', long, value_name = "BAM")]
29 pub mapped: PathBuf,
30
31 #[arg(long, value_name = "BAM")]
34 pub truth: Option<PathBuf>,
35
36 #[command(flatten)]
37 pub output: OutputPrefixOptions,
38
39 #[arg(long, default_value_t = 5, value_name = "INT")]
43 pub wiggle: u32,
44}
45
46#[derive(Debug, Default, Clone)]
48struct BinCounts {
49 correct: u64,
51 mismapped: u64,
53 unmapped: u64,
55 total: u64,
57}
58
59impl Command for Eval {
60 fn execute(&self) -> Result<()> {
61 if self.truth.is_some() {
62 log::warn!("--truth (golden BAM) is not yet implemented; using read names");
63 }
64
65 let mut reader = bam::io::reader::Builder
66 .build_from_path(&self.mapped)
67 .with_context(|| format!("Failed to open BAM: {}", self.mapped.display()))?;
68
69 let header = reader.read_header()?;
70
71 let mut bins: BTreeMap<u8, BinCounts> = BTreeMap::new();
73 let mut total_reads: u64 = 0;
74 let mut parse_failures: u64 = 0;
75
76 for result in reader.records() {
77 let record = result.with_context(|| "Failed to read BAM record")?;
78
79 let flags = record.flags();
81 if flags.is_secondary() || flags.is_supplementary() {
82 continue;
83 }
84 total_reads += 1;
85
86 let name_bytes = record.name().map_or(&b""[..], |n| n.as_bytes());
88 let name = name_bytes.to_str().unwrap_or("");
89
90 let truth = parse_encoded_pe_name(name)
92 .map(|(_, r1, _)| r1)
93 .or_else(|| parse_encoded_se_name(name).map(|(_, r1)| r1));
94
95 let Some(truth) = truth else {
96 parse_failures += 1;
97 continue;
98 };
99
100 let mapq = record.mapping_quality().map_or(0, u8::from);
101 let bin_key = mapq_bin(mapq);
102 let counts = bins.entry(bin_key).or_default();
103 counts.total += 1;
104
105 if flags.is_unmapped() {
106 counts.unmapped += 1;
107 continue;
108 }
109
110 let mapped_contig_idx = record.reference_sequence_id().and_then(Result::ok);
112 let mapped_pos = record.alignment_start().and_then(Result::ok).map(usize::from);
113
114 let (Some(mapped_contig_idx), Some(mapped_pos_1based)) =
115 (mapped_contig_idx, mapped_pos)
116 else {
117 counts.mismapped += 1;
118 continue;
119 };
120
121 let Some((contig_name, _)) = header.reference_sequences().get_index(mapped_contig_idx)
123 else {
124 counts.mismapped += 1;
125 continue;
126 };
127 let mapped_contig = String::from_utf8_lossy(contig_name.as_ref());
128
129 #[expect(clippy::cast_possible_truncation, reason = "mapped positions fit u32")]
131 let mapped_pos_u32 = mapped_pos_1based as u32;
132 let is_correct = mapped_contig == truth.contig
133 && mapped_pos_u32.abs_diff(truth.position) <= self.wiggle;
134
135 if is_correct {
136 counts.correct += 1;
137 } else {
138 counts.mismapped += 1;
139 }
140 }
141
142 if parse_failures > 0 {
143 log::warn!("{parse_failures} reads had unparseable names; skipped");
144 }
145
146 let output_file = output_path(&self.output.output, ".eval.txt");
148 let mut out = std::fs::File::create(&output_file)
149 .with_context(|| format!("Failed to create {}", output_file.display()))?;
150
151 writeln!(
152 out,
153 "mapq_bin\ttotal\tcorrect\tmismapped\tunmapped\tpct_correct\tpct_mismapped\tpct_unmapped"
154 )?;
155
156 let mut grand_total = BinCounts::default();
157 for (&bin, counts) in &bins {
158 write_bin_row(&mut out, &format_bin_label(bin), counts)?;
159 grand_total.correct += counts.correct;
160 grand_total.mismapped += counts.mismapped;
161 grand_total.unmapped += counts.unmapped;
162 grand_total.total += counts.total;
163 }
164 write_bin_row(&mut out, "ALL", &grand_total)?;
165
166 log::info!(
167 "Evaluated {total_reads} reads: {} correct, {} mismapped, {} unmapped",
168 grand_total.correct,
169 grand_total.mismapped,
170 grand_total.unmapped
171 );
172 log::info!("Results written to: {}", output_file.display());
173
174 Ok(())
175 }
176}
177
178fn mapq_bin(mapq: u8) -> u8 {
180 match mapq {
181 0 => 0,
182 1..=9 => 1,
183 10..=19 => 10,
184 20..=29 => 20,
185 30..=39 => 30,
186 40..=49 => 40,
187 50..=59 => 50,
188 _ => 60,
189 }
190}
191
192fn format_bin_label(bin: u8) -> String {
194 match bin {
195 0 => "0".to_string(),
196 1 => "1-9".to_string(),
197 60 => "60+".to_string(),
198 _ => format!("{}-{}", bin, bin + 9),
199 }
200}
201
202fn write_bin_row(out: &mut impl Write, label: &str, counts: &BinCounts) -> Result<()> {
204 let total = counts.total.max(1) as f64;
205 writeln!(
206 out,
207 "{label}\t{}\t{}\t{}\t{}\t{:.2}\t{:.2}\t{:.2}",
208 counts.total,
209 counts.correct,
210 counts.mismapped,
211 counts.unmapped,
212 counts.correct as f64 / total * 100.0,
213 counts.mismapped as f64 / total * 100.0,
214 counts.unmapped as f64 / total * 100.0,
215 )?;
216 Ok(())
217}
218
219#[cfg(test)]
220mod tests {
221 use super::*;
222
223 #[test]
224 fn test_mapq_bin() {
225 assert_eq!(mapq_bin(0), 0);
226 assert_eq!(mapq_bin(5), 1);
227 assert_eq!(mapq_bin(10), 10);
228 assert_eq!(mapq_bin(15), 10);
229 assert_eq!(mapq_bin(30), 30);
230 assert_eq!(mapq_bin(60), 60);
231 assert_eq!(mapq_bin(255), 60);
232 }
233
234 #[test]
235 fn test_format_bin_label() {
236 assert_eq!(format_bin_label(0), "0");
237 assert_eq!(format_bin_label(1), "1-9");
238 assert_eq!(format_bin_label(10), "10-19");
239 assert_eq!(format_bin_label(60), "60+");
240 }
241
242 #[test]
243 fn test_write_bin_row() {
244 let counts = BinCounts { correct: 90, mismapped: 8, unmapped: 2, total: 100 };
245 let mut buf = Vec::new();
246 write_bin_row(&mut buf, "30-39", &counts).unwrap();
247 let line = String::from_utf8(buf).unwrap();
248 assert!(line.starts_with("30-39\t100\t90\t8\t2\t"));
249 assert!(line.contains("90.00"));
250 }
251}