1#[derive(Debug, Clone, PartialEq, Eq)]
13pub struct TruthAlignment {
14 pub contig: String,
16 pub position: u32,
18 pub is_forward: bool,
20 pub haplotype: usize,
22 pub n_errors: usize,
24}
25
26#[must_use]
35pub fn encoded_pe_name(read_num: u64, r1: &TruthAlignment, r2: &TruthAlignment) -> String {
36 debug_assert_eq!(
37 r1.contig, r2.contig,
38 "R1 and R2 must be on the same contig for encoded PE names"
39 );
40 format!(
41 "holodeck:{}:{}:{}{}:{}{}:{}:{}:{}",
42 read_num,
43 r1.contig,
44 r1.position,
45 strand_char(r1.is_forward),
46 r2.position,
47 strand_char(r2.is_forward),
48 r1.haplotype,
49 r1.n_errors,
50 r2.n_errors,
51 )
52}
53
54#[must_use]
60pub fn encoded_se_name(read_num: u64, r1: &TruthAlignment) -> String {
61 format!(
62 "holodeck:{}:{}:{}{}:{}:{}",
63 read_num,
64 r1.contig,
65 r1.position,
66 strand_char(r1.is_forward),
67 r1.haplotype,
68 r1.n_errors,
69 )
70}
71
72#[must_use]
76pub fn simple_name(read_num: u64) -> String {
77 format!("holodeck:{read_num}")
78}
79
80fn strand_char(is_forward: bool) -> char {
82 if is_forward { 'F' } else { 'R' }
83}
84
85#[must_use]
93pub fn parse_encoded_pe_name(name: &str) -> Option<(u64, TruthAlignment, TruthAlignment)> {
94 let mut rev_parts: Vec<&str> = name.rsplitn(6, ':').collect();
98 if rev_parts.len() != 6 {
99 return None;
100 }
101 rev_parts.reverse();
104 let prefix = rev_parts[0];
107 let (pos1, fwd1) = parse_pos_strand(rev_parts[1])?;
108 let (pos2, fwd2) = parse_pos_strand(rev_parts[2])?;
109 let haplotype: usize = rev_parts[3].parse().ok()?;
110 let n_errors_r1: usize = rev_parts[4].parse().ok()?;
111 let n_errors_r2: usize = rev_parts[5].parse().ok()?;
112
113 let prefix = prefix.strip_prefix("holodeck:")?;
115 let (read_num_str, contig) = prefix.split_once(':')?;
116 let read_num: u64 = read_num_str.parse().ok()?;
117
118 let r1 = TruthAlignment {
119 contig: contig.to_string(),
120 position: pos1,
121 is_forward: fwd1,
122 haplotype,
123 n_errors: n_errors_r1,
124 };
125 let r2 = TruthAlignment {
126 contig: contig.to_string(),
127 position: pos2,
128 is_forward: fwd2,
129 haplotype,
130 n_errors: n_errors_r2,
131 };
132
133 Some((read_num, r1, r2))
134}
135
136#[must_use]
142pub fn parse_encoded_se_name(name: &str) -> Option<(u64, TruthAlignment)> {
143 let mut rev_parts: Vec<&str> = name.rsplitn(4, ':').collect();
146 if rev_parts.len() != 4 {
147 return None;
148 }
149 rev_parts.reverse();
150 let prefix = rev_parts[0];
153 let (pos, fwd) = parse_pos_strand(rev_parts[1])?;
154 let haplotype: usize = rev_parts[2].parse().ok()?;
155 let n_errors: usize = rev_parts[3].parse().ok()?;
156
157 let prefix = prefix.strip_prefix("holodeck:")?;
158 let (read_num_str, contig) = prefix.split_once(':')?;
159 let read_num: u64 = read_num_str.parse().ok()?;
160
161 let truth = TruthAlignment {
162 contig: contig.to_string(),
163 position: pos,
164 is_forward: fwd,
165 haplotype,
166 n_errors,
167 };
168
169 Some((read_num, truth))
170}
171
172fn parse_pos_strand(field: &str) -> Option<(u32, bool)> {
174 if field.is_empty() {
175 return None;
176 }
177 let strand_char = field.as_bytes()[field.len() - 1];
178 let is_forward = match strand_char {
179 b'F' => true,
180 b'R' => false,
181 _ => return None,
182 };
183 let pos: u32 = field[..field.len() - 1].parse().ok()?;
184 Some((pos, is_forward))
185}
186
187#[cfg(test)]
188mod tests {
189 use super::*;
190
191 fn make_truth(contig: &str, pos: u32, fwd: bool, hap: usize, errs: usize) -> TruthAlignment {
192 TruthAlignment {
193 contig: contig.to_string(),
194 position: pos,
195 is_forward: fwd,
196 haplotype: hap,
197 n_errors: errs,
198 }
199 }
200
201 #[test]
202 fn test_encoded_pe_name() {
203 let r1 = make_truth("chr1", 10000, true, 0, 2);
204 let r2 = make_truth("chr1", 10450, false, 0, 1);
205 let name = encoded_pe_name(42, &r1, &r2);
206 assert_eq!(name, "holodeck:42:chr1:10000F:10450R:0:2:1");
207 }
208
209 #[test]
210 fn test_encoded_se_name() {
211 let r1 = make_truth("chr1", 10000, true, 0, 2);
212 let name = encoded_se_name(42, &r1);
213 assert_eq!(name, "holodeck:42:chr1:10000F:0:2");
214 }
215
216 #[test]
217 fn test_simple_name() {
218 assert_eq!(simple_name(42), "holodeck:42");
219 assert_eq!(simple_name(1), "holodeck:1");
220 }
221
222 #[test]
223 fn test_parse_pe_roundtrip() {
224 let r1 = make_truth("chr1", 10000, true, 0, 2);
225 let r2 = make_truth("chr1", 10450, false, 0, 1);
226 let name = encoded_pe_name(42, &r1, &r2);
227
228 let (num, parsed_r1, parsed_r2) = parse_encoded_pe_name(&name).unwrap();
229 assert_eq!(num, 42);
230 assert_eq!(parsed_r1, r1);
231 assert_eq!(parsed_r2, r2);
232 }
233
234 #[test]
235 fn test_parse_se_roundtrip() {
236 let r1 = make_truth("chrX", 500, false, 1, 0);
237 let name = encoded_se_name(99, &r1);
238
239 let (num, parsed) = parse_encoded_se_name(&name).unwrap();
240 assert_eq!(num, 99);
241 assert_eq!(parsed, r1);
242 }
243
244 #[test]
245 fn test_parse_pe_with_colon_in_contig() {
246 let r1 = make_truth("HLA-A*01:01:01:01", 100, true, 0, 0);
248 let r2 = make_truth("HLA-A*01:01:01:01", 400, false, 0, 1);
249 let name = encoded_pe_name(1, &r1, &r2);
250
251 let (num, parsed_r1, parsed_r2) = parse_encoded_pe_name(&name).unwrap();
252 assert_eq!(num, 1);
253 assert_eq!(parsed_r1.contig, "HLA-A*01:01:01:01");
254 assert_eq!(parsed_r2.contig, "HLA-A*01:01:01:01");
255 assert_eq!(parsed_r1.position, 100);
256 assert_eq!(parsed_r2.position, 400);
257 }
258
259 #[test]
260 fn test_parse_se_with_colon_in_contig() {
261 let r1 = make_truth("HLA-B*07:02", 50, false, 1, 3);
262 let name = encoded_se_name(5, &r1);
263
264 let (num, parsed) = parse_encoded_se_name(&name).unwrap();
265 assert_eq!(num, 5);
266 assert_eq!(parsed, r1);
267 }
268
269 #[test]
270 fn test_cross_format_rejection() {
271 let se_name = encoded_se_name(1, &make_truth("chr1", 10, true, 0, 0));
273 assert!(parse_encoded_pe_name(&se_name).is_none());
274
275 let pe_name = encoded_pe_name(
277 1,
278 &make_truth("chr1", 10, true, 0, 0),
279 &make_truth("chr1", 20, false, 0, 0),
280 );
281 assert!(parse_encoded_se_name(&pe_name).is_none());
284 }
285
286 #[test]
287 fn test_parse_invalid_names() {
288 assert!(parse_encoded_pe_name("not_holodeck:1:chr1:10F:20R:0:0:0").is_none());
289 assert!(parse_encoded_pe_name("").is_none());
290 assert!(parse_encoded_se_name("holodeck:1").is_none());
291 }
292
293 #[test]
294 fn test_parse_pos_strand() {
295 assert_eq!(parse_pos_strand("10000F"), Some((10000, true)));
296 assert_eq!(parse_pos_strand("450R"), Some((450, false)));
297 assert_eq!(parse_pos_strand("0F"), Some((0, true)));
298 assert_eq!(parse_pos_strand("X"), None);
299 assert_eq!(parse_pos_strand(""), None);
300 }
301}