1use anyhow::Result;
7use bstr::BString;
8use noodles::sam::Header;
9use noodles::sam::header::record::value::Map;
10use noodles::sam::header::record::value::map::Program;
11use noodles::sam::header::record::value::map::program::tag;
12use std::collections::HashSet;
13
14#[must_use]
27pub fn get_last_program_id(header: &Header) -> Option<String> {
28 let programs = header.programs();
29 let program_map = programs.as_ref();
30
31 if program_map.is_empty() {
32 return None;
33 }
34
35 let mut referenced: HashSet<&[u8]> = HashSet::new();
37 for (_id, pg) in program_map {
38 if let Some(pp) = pg.other_fields().get(&tag::PREVIOUS_PROGRAM_ID) {
39 referenced.insert(pp.as_ref());
40 }
41 }
42
43 for (id, _pg) in program_map {
45 if !referenced.contains(id.as_slice()) {
46 return Some(String::from_utf8_lossy(id).to_string());
47 }
48 }
49
50 program_map.keys().next().map(|id| String::from_utf8_lossy(id).to_string())
52}
53
54#[must_use]
65pub fn make_unique_program_id(header: &Header, base_id: &str) -> String {
66 let programs = header.programs();
67 let program_map = programs.as_ref();
68
69 if !program_map.contains_key(base_id.as_bytes()) {
71 return base_id.to_string();
72 }
73
74 for i in 1..=1000 {
76 let candidate = format!("{base_id}.{i}");
77 if !program_map.contains_key(candidate.as_bytes()) {
78 return candidate;
79 }
80 }
81
82 format!("{base_id}.{}", std::process::id())
84}
85
86pub fn build_program_record(
101 version: &str,
102 command_line: &str,
103 previous_program: Option<&str>,
104) -> Result<Map<Program>> {
105 let mut builder = Map::<Program>::builder()
106 .insert(tag::NAME, "fgumi")
107 .insert(tag::VERSION, version)
108 .insert(tag::COMMAND_LINE, command_line);
109
110 if let Some(pp) = previous_program {
111 builder = builder.insert(tag::PREVIOUS_PROGRAM_ID, pp);
112 }
113
114 Ok(builder.build()?)
115}
116
117pub fn add_pg_record(mut header: Header, version: &str, command_line: &str) -> Result<Header> {
137 let previous_program = get_last_program_id(&header);
138 let unique_id = make_unique_program_id(&header, "fgumi");
139 let pg_record = build_program_record(version, command_line, previous_program.as_deref())?;
140
141 header.programs_mut().add(BString::from(unique_id), pg_record)?;
142
143 Ok(header)
144}
145
146pub fn add_pg_to_builder(
163 builder: noodles::sam::header::Builder,
164 version: &str,
165 command_line: &str,
166) -> Result<noodles::sam::header::Builder> {
167 let pg_record = build_program_record(version, command_line, None)?;
168 Ok(builder.add_program("fgumi", pg_record))
169}
170
171#[cfg(test)]
172mod tests {
173 use super::*;
174
175 #[test]
176 fn test_get_last_program_id_empty() {
177 let header = Header::default();
178 assert_eq!(get_last_program_id(&header), None);
179 }
180
181 #[test]
182 fn test_get_last_program_id_single() {
183 let mut header = Header::default();
184 let pg = Map::<Program>::default();
185 header.programs_mut().add(BString::from("bwa"), pg).unwrap();
186 assert_eq!(get_last_program_id(&header), Some("bwa".to_string()));
187 }
188
189 #[test]
190 fn test_get_last_program_id_chained() {
191 let mut header = Header::default();
192
193 let pg1 = Map::<Program>::default();
195 header.programs_mut().add(BString::from("bwa"), pg1).unwrap();
196
197 let pg2 =
199 Map::<Program>::builder().insert(tag::PREVIOUS_PROGRAM_ID, "bwa").build().unwrap();
200 header.programs_mut().add(BString::from("samtools"), pg2).unwrap();
201
202 assert_eq!(get_last_program_id(&header), Some("samtools".to_string()));
204 }
205
206 #[test]
207 fn test_make_unique_program_id_no_collision() {
208 let header = Header::default();
209 assert_eq!(make_unique_program_id(&header, "fgumi"), "fgumi");
210 }
211
212 #[test]
213 fn test_make_unique_program_id_with_collision() {
214 let mut header = Header::default();
215 let pg = Map::<Program>::default();
216 header.programs_mut().add(BString::from("fgumi"), pg).unwrap();
217
218 assert_eq!(make_unique_program_id(&header, "fgumi"), "fgumi.1");
219 }
220
221 #[test]
222 fn test_make_unique_program_id_multiple_collisions() {
223 let mut header = Header::default();
224
225 let pg1 = Map::<Program>::default();
226 header.programs_mut().add(BString::from("fgumi"), pg1).unwrap();
227
228 let pg2 = Map::<Program>::default();
229 header.programs_mut().add(BString::from("fgumi.1"), pg2).unwrap();
230
231 assert_eq!(make_unique_program_id(&header, "fgumi"), "fgumi.2");
232 }
233
234 #[test]
235 fn test_add_pg_record_empty_header() {
236 let header = Header::default();
237 let result = add_pg_record(header, "1.0.0", "fgumi test").unwrap();
238 let programs = result.programs();
239 assert_eq!(programs.as_ref().len(), 1);
240 assert!(programs.as_ref().contains_key(b"fgumi".as_slice()));
241
242 let pg = programs.as_ref().get(b"fgumi".as_slice()).unwrap();
244 assert_eq!(
245 pg.other_fields().get(&tag::NAME).map(std::convert::AsRef::as_ref),
246 Some(b"fgumi".as_slice())
247 );
248 assert_eq!(
249 pg.other_fields().get(&tag::VERSION).map(std::convert::AsRef::as_ref),
250 Some(b"1.0.0".as_slice())
251 );
252 assert_eq!(
253 pg.other_fields().get(&tag::COMMAND_LINE).map(std::convert::AsRef::as_ref),
254 Some(b"fgumi test".as_slice())
255 );
256 assert!(pg.other_fields().get(&tag::PREVIOUS_PROGRAM_ID).is_none());
257 }
258
259 #[test]
260 fn test_add_pg_record_with_existing_fgumi() {
261 let mut header = Header::default();
262 let pg = Map::<Program>::default();
263 header.programs_mut().add(BString::from("fgumi"), pg).unwrap();
264
265 let result = add_pg_record(header, "1.0.0", "fgumi test2").unwrap();
266 let programs = result.programs();
267 assert_eq!(programs.as_ref().len(), 2);
268 assert!(programs.as_ref().contains_key(b"fgumi.1".as_slice()));
269
270 let pg = programs.as_ref().get(b"fgumi.1".as_slice()).unwrap();
272 assert_eq!(
273 pg.other_fields().get(&tag::PREVIOUS_PROGRAM_ID).map(std::convert::AsRef::as_ref),
274 Some(b"fgumi".as_slice())
275 );
276 }
277
278 #[test]
279 fn test_add_pg_record_chains_to_non_fgumi() {
280 let mut header = Header::default();
281
282 let bwa_pg = Map::<Program>::builder()
284 .insert(tag::NAME, "bwa")
285 .insert(tag::VERSION, "0.7.17")
286 .build()
287 .unwrap();
288 header.programs_mut().add(BString::from("bwa"), bwa_pg).unwrap();
289
290 let result = add_pg_record(header, "1.0.0", "fgumi group -i in.bam").unwrap();
291 let programs = result.programs();
292
293 let pg = programs.as_ref().get(b"fgumi".as_slice()).unwrap();
295 assert_eq!(
296 pg.other_fields().get(&tag::PREVIOUS_PROGRAM_ID).map(std::convert::AsRef::as_ref),
297 Some(b"bwa".as_slice())
298 );
299 }
300
301 #[test]
302 fn test_add_pg_to_builder() {
303 let builder = Header::builder();
304 let builder = add_pg_to_builder(builder, "1.0.0", "fgumi extract").unwrap();
305 let header = builder.build();
306
307 let programs = header.programs();
308 assert_eq!(programs.as_ref().len(), 1);
309
310 let pg = programs.as_ref().get(b"fgumi".as_slice()).unwrap();
311 assert_eq!(
312 pg.other_fields().get(&tag::NAME).map(std::convert::AsRef::as_ref),
313 Some(b"fgumi".as_slice())
314 );
315 assert!(pg.other_fields().get(&tag::PREVIOUS_PROGRAM_ID).is_none());
316 }
317
318 #[test]
319 fn test_add_pg_record_empty_command_line() {
320 let header = Header::default();
321 let result = add_pg_record(header, "1.0.0", "").unwrap();
322 let programs = result.programs();
323 assert_eq!(programs.as_ref().len(), 1);
324 assert!(programs.as_ref().contains_key(b"fgumi".as_slice()));
325 }
326
327 #[test]
328 fn test_add_pg_record_write_to_bam() {
329 use crate::bam_io::create_bam_writer;
330 use tempfile::TempDir;
331
332 let dir = TempDir::new().unwrap();
333 let output_path = dir.path().join("test.bam");
334
335 let header = Header::default();
336 let result = add_pg_record(header, "1.0.0", "fgumi test").unwrap();
337
338 let _writer = create_bam_writer(&output_path, &result, 1, 6).unwrap();
340 }
341
342 #[test]
343 fn test_add_pg_record_chains_to_empty_program() {
344 use crate::bam_io::create_bam_writer;
345 use tempfile::TempDir;
346
347 let pg_map = Map::<Program>::default();
349 let header = Header::builder().add_program("SamBuilder", pg_map).build();
350
351 let result = add_pg_record(header, "1.0.0", "fgumi test").unwrap();
353 let programs = result.programs();
354 assert_eq!(programs.as_ref().len(), 2);
355
356 let pg = programs.as_ref().get(b"fgumi".as_slice()).unwrap();
358 assert_eq!(
359 pg.other_fields().get(&tag::PREVIOUS_PROGRAM_ID).map(std::convert::AsRef::as_ref),
360 Some(b"SamBuilder".as_slice())
361 );
362
363 let dir = TempDir::new().unwrap();
365 let output_path = dir.path().join("test.bam");
366 let _writer = create_bam_writer(&output_path, &result, 1, 6).unwrap();
367 }
368}