use anyhow::Result;
use bstr::BString;
use noodles::sam::Header;
use noodles::sam::header::record::value::Map;
use noodles::sam::header::record::value::map::Program;
use noodles::sam::header::record::value::map::program::tag;
use std::collections::HashSet;
#[must_use]
pub fn get_last_program_id(header: &Header) -> Option<String> {
let programs = header.programs();
let program_map = programs.as_ref();
if program_map.is_empty() {
return None;
}
let mut referenced: HashSet<&[u8]> = HashSet::new();
for (_id, pg) in program_map {
if let Some(pp) = pg.other_fields().get(&tag::PREVIOUS_PROGRAM_ID) {
referenced.insert(pp.as_ref());
}
}
for (id, _pg) in program_map {
if !referenced.contains(id.as_slice()) {
return Some(String::from_utf8_lossy(id).to_string());
}
}
program_map.keys().next().map(|id| String::from_utf8_lossy(id).to_string())
}
#[must_use]
pub fn make_unique_program_id(header: &Header, base_id: &str) -> String {
let programs = header.programs();
let program_map = programs.as_ref();
if !program_map.contains_key(base_id.as_bytes()) {
return base_id.to_string();
}
for i in 1..=1000 {
let candidate = format!("{base_id}.{i}");
if !program_map.contains_key(candidate.as_bytes()) {
return candidate;
}
}
format!("{base_id}.{}", std::process::id())
}
pub fn build_program_record(
version: &str,
command_line: &str,
previous_program: Option<&str>,
) -> Result<Map<Program>> {
let mut builder = Map::<Program>::builder()
.insert(tag::NAME, "fgumi")
.insert(tag::VERSION, version)
.insert(tag::COMMAND_LINE, command_line);
if let Some(pp) = previous_program {
builder = builder.insert(tag::PREVIOUS_PROGRAM_ID, pp);
}
Ok(builder.build()?)
}
pub fn add_pg_record(mut header: Header, version: &str, command_line: &str) -> Result<Header> {
let previous_program = get_last_program_id(&header);
let unique_id = make_unique_program_id(&header, "fgumi");
let pg_record = build_program_record(version, command_line, previous_program.as_deref())?;
header.programs_mut().add(BString::from(unique_id), pg_record)?;
Ok(header)
}
pub fn add_pg_to_builder(
builder: noodles::sam::header::Builder,
version: &str,
command_line: &str,
) -> Result<noodles::sam::header::Builder> {
let pg_record = build_program_record(version, command_line, None)?;
Ok(builder.add_program("fgumi", pg_record))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_get_last_program_id_empty() {
let header = Header::default();
assert_eq!(get_last_program_id(&header), None);
}
#[test]
fn test_get_last_program_id_single() {
let mut header = Header::default();
let pg = Map::<Program>::default();
header
.programs_mut()
.add(BString::from("bwa"), pg)
.expect("adding program to header should succeed");
assert_eq!(get_last_program_id(&header), Some("bwa".to_string()));
}
#[test]
fn test_get_last_program_id_chained() {
let mut header = Header::default();
let pg1 = Map::<Program>::default();
header
.programs_mut()
.add(BString::from("bwa"), pg1)
.expect("adding program to header should succeed");
let pg2 = Map::<Program>::builder()
.insert(tag::PREVIOUS_PROGRAM_ID, "bwa")
.build()
.expect("build should succeed");
header
.programs_mut()
.add(BString::from("samtools"), pg2)
.expect("adding program to header should succeed");
assert_eq!(get_last_program_id(&header), Some("samtools".to_string()));
}
#[test]
fn test_make_unique_program_id_no_collision() {
let header = Header::default();
assert_eq!(make_unique_program_id(&header, "fgumi"), "fgumi");
}
#[test]
fn test_make_unique_program_id_with_collision() {
let mut header = Header::default();
let pg = Map::<Program>::default();
header
.programs_mut()
.add(BString::from("fgumi"), pg)
.expect("adding program to header should succeed");
assert_eq!(make_unique_program_id(&header, "fgumi"), "fgumi.1");
}
#[test]
fn test_make_unique_program_id_multiple_collisions() {
let mut header = Header::default();
let pg1 = Map::<Program>::default();
header
.programs_mut()
.add(BString::from("fgumi"), pg1)
.expect("adding program to header should succeed");
let pg2 = Map::<Program>::default();
header
.programs_mut()
.add(BString::from("fgumi.1"), pg2)
.expect("adding program to header should succeed");
assert_eq!(make_unique_program_id(&header, "fgumi"), "fgumi.2");
}
#[test]
fn test_add_pg_record_empty_header() {
let header = Header::default();
let result =
add_pg_record(header, "1.0.0", "fgumi test").expect("add_pg_record should succeed");
let programs = result.programs();
assert_eq!(programs.as_ref().len(), 1);
assert!(programs.as_ref().contains_key(b"fgumi".as_slice()));
let pg =
programs.as_ref().get(b"fgumi".as_slice()).expect("expected key should be present");
assert_eq!(
pg.other_fields().get(&tag::NAME).map(std::convert::AsRef::as_ref),
Some(b"fgumi".as_slice())
);
assert_eq!(
pg.other_fields().get(&tag::VERSION).map(std::convert::AsRef::as_ref),
Some(b"1.0.0".as_slice())
);
assert_eq!(
pg.other_fields().get(&tag::COMMAND_LINE).map(std::convert::AsRef::as_ref),
Some(b"fgumi test".as_slice())
);
assert!(pg.other_fields().get(&tag::PREVIOUS_PROGRAM_ID).is_none());
}
#[test]
fn test_add_pg_record_with_existing_fgumi() {
let mut header = Header::default();
let pg = Map::<Program>::default();
header
.programs_mut()
.add(BString::from("fgumi"), pg)
.expect("adding program to header should succeed");
let result =
add_pg_record(header, "1.0.0", "fgumi test2").expect("add_pg_record should succeed");
let programs = result.programs();
assert_eq!(programs.as_ref().len(), 2);
assert!(programs.as_ref().contains_key(b"fgumi.1".as_slice()));
let pg =
programs.as_ref().get(b"fgumi.1".as_slice()).expect("expected key should be present");
assert_eq!(
pg.other_fields().get(&tag::PREVIOUS_PROGRAM_ID).map(std::convert::AsRef::as_ref),
Some(b"fgumi".as_slice())
);
}
#[test]
fn test_add_pg_record_chains_to_non_fgumi() {
let mut header = Header::default();
let bwa_pg = Map::<Program>::builder()
.insert(tag::NAME, "bwa")
.insert(tag::VERSION, "0.7.17")
.build()
.expect("building program map should succeed");
header
.programs_mut()
.add(BString::from("bwa"), bwa_pg)
.expect("adding program to header should succeed");
let result = add_pg_record(header, "1.0.0", "fgumi group -i in.bam")
.expect("add_pg_record should succeed");
let programs = result.programs();
let pg =
programs.as_ref().get(b"fgumi".as_slice()).expect("expected key should be present");
assert_eq!(
pg.other_fields().get(&tag::PREVIOUS_PROGRAM_ID).map(std::convert::AsRef::as_ref),
Some(b"bwa".as_slice())
);
}
#[test]
fn test_add_pg_to_builder() {
let builder = Header::builder();
let builder = add_pg_to_builder(builder, "1.0.0", "fgumi extract")
.expect("add_pg_to_builder should succeed");
let header = builder.build();
let programs = header.programs();
assert_eq!(programs.as_ref().len(), 1);
let pg =
programs.as_ref().get(b"fgumi".as_slice()).expect("expected key should be present");
assert_eq!(
pg.other_fields().get(&tag::NAME).map(std::convert::AsRef::as_ref),
Some(b"fgumi".as_slice())
);
assert!(pg.other_fields().get(&tag::PREVIOUS_PROGRAM_ID).is_none());
}
#[test]
fn test_add_pg_record_empty_command_line() {
let header = Header::default();
let result = add_pg_record(header, "1.0.0", "").expect("add_pg_record should succeed");
let programs = result.programs();
assert_eq!(programs.as_ref().len(), 1);
assert!(programs.as_ref().contains_key(b"fgumi".as_slice()));
}
#[test]
fn test_add_pg_record_write_to_bam() {
use crate::bam_io::create_bam_writer;
use tempfile::TempDir;
let dir = TempDir::new().expect("creating temp file/dir should succeed");
let output_path = dir.path().join("test.bam");
let header = Header::default();
let result =
add_pg_record(header, "1.0.0", "fgumi test").expect("add_pg_record should succeed");
let _writer = create_bam_writer(&output_path, &result, 1, 6)
.expect("creating BAM writer should succeed");
}
#[test]
fn test_add_pg_record_chains_to_empty_program() {
use crate::bam_io::create_bam_writer;
use tempfile::TempDir;
let pg_map = Map::<Program>::default();
let header = Header::builder().add_program("SamBuilder", pg_map).build();
let result =
add_pg_record(header, "1.0.0", "fgumi test").expect("add_pg_record should succeed");
let programs = result.programs();
assert_eq!(programs.as_ref().len(), 2);
let pg =
programs.as_ref().get(b"fgumi".as_slice()).expect("expected key should be present");
assert_eq!(
pg.other_fields().get(&tag::PREVIOUS_PROGRAM_ID).map(std::convert::AsRef::as_ref),
Some(b"SamBuilder".as_slice())
);
let dir = TempDir::new().expect("creating temp file/dir should succeed");
let output_path = dir.path().join("test.bam");
let _writer = create_bam_writer(&output_path, &result, 1, 6)
.expect("creating BAM writer should succeed");
}
}