use std::path::Path;
use anyhow::{Context, Result};
use bstr::BString;
use noodles::sam::Header;
use noodles::sam::header::record::value::Map;
use noodles::sam::header::record::value::map::header::tag as header_tag;
use tempfile::TempDir;
pub use fgumi_raw_bam as bam_fields;
pub mod bgzf_io;
pub mod inline_buffer;
pub mod keys;
pub mod loser_tree;
pub(crate) mod memory_probe;
pub mod pipeline;
pub mod pooled_bam_writer;
pub mod pooled_chunk_writer;
pub mod radix;
pub mod raw;
pub mod raw_bam_reader;
pub mod read_ahead;
pub(crate) mod segmented_buf;
pub mod tmp_dir_alloc;
pub mod worker_pool;
const MERGE_BUFFER_SIZE: usize = 64 * 1024;
#[derive(Default, Debug)]
pub struct SortStats {
pub total_records: u64,
pub output_records: u64,
pub chunks_written: usize,
}
pub(crate) fn create_output_header(sort_order: keys::SortOrder, header: &Header) -> Header {
let mut builder = Header::builder();
for (name, seq) in header.reference_sequences() {
builder = builder.add_reference_sequence(name.as_slice(), seq.clone());
}
for (id, rg) in header.read_groups() {
builder = builder.add_read_group(id.as_slice(), rg.clone());
}
for (id, pg) in header.programs().as_ref() {
builder = builder.add_program(id.as_slice(), pg.clone());
}
for comment in header.comments() {
builder = builder.add_comment(comment.clone());
}
let mut hd = header.header().cloned().unwrap_or_else(|| {
Map::<noodles::sam::header::record::value::map::Header>::builder()
.build()
.expect("valid default header")
});
hd.other_fields_mut().swap_remove(&header_tag::SORT_ORDER);
hd.other_fields_mut().swap_remove(&header_tag::GROUP_ORDER);
hd.other_fields_mut().swap_remove(&header_tag::SUBSORT_ORDER);
match sort_order {
keys::SortOrder::Coordinate => {
hd.other_fields_mut().insert(header_tag::SORT_ORDER, BString::from("coordinate"));
}
keys::SortOrder::Queryname(_) => {
hd.other_fields_mut().insert(header_tag::SORT_ORDER, BString::from("queryname"));
if let Some(ss) = sort_order.header_ss_tag() {
hd.other_fields_mut().insert(header_tag::SUBSORT_ORDER, BString::from(ss));
}
}
keys::SortOrder::TemplateCoordinate => {
hd.other_fields_mut().insert(header_tag::SORT_ORDER, BString::from("unsorted"));
hd.other_fields_mut().insert(header_tag::GROUP_ORDER, BString::from("query"));
hd.other_fields_mut()
.insert(header_tag::SUBSORT_ORDER, BString::from("template-coordinate"));
}
}
builder = builder.set_header(hd);
builder.build()
}
fn create_temp_dir(base: Option<&Path>) -> Result<TempDir> {
match base {
Some(base) => {
std::fs::create_dir_all(base)?;
TempDir::new_in(base).context("Failed to create temp directory")
}
None => TempDir::new().context("Failed to create temp directory"),
}
}
pub use inline_buffer::{TemplateKey, extract_coordinate_key_inline};
pub use keys::{
QuerynameComparator, RawCoordinateKey, RawQuerynameKey, RawQuerynameLexKey, RawSortKey,
SortContext, SortOrder, natural_compare, normalize_natural_key,
};
pub use pipeline::{ParallelMergeConfig, parallel_merge, parallel_merge_buffered};
pub use raw::{LibraryLookup, RawExternalSorter, cb_hasher, extract_template_key_inline};
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_create_temp_dir_default() {
let dir = create_temp_dir(None).expect("creating temp dir should succeed");
assert!(dir.path().exists());
}
#[test]
fn test_create_temp_dir_with_base() {
let base = tempfile::tempdir().expect("creating temp file/dir should succeed");
let subdir = base.path().join("sort_spill");
let dir = create_temp_dir(Some(&subdir)).expect("creating temp dir should succeed");
assert!(dir.path().exists());
assert!(dir.path().starts_with(&subdir));
}
#[test]
fn test_sort_stats_default() {
let stats = SortStats::default();
assert_eq!(stats.total_records, 0);
assert_eq!(stats.output_records, 0);
assert_eq!(stats.chunks_written, 0);
}
#[test]
fn test_create_output_header_preserves_vn() {
let hd = Map::<noodles::sam::header::record::value::map::Header>::new(
noodles::sam::header::record::value::map::header::Version::new(1, 6),
);
let header = Header::builder().set_header(hd).build();
let output = create_output_header(keys::SortOrder::Coordinate, &header);
let hd = output.header().expect("should have @HD");
assert_eq!(
hd.version(),
noodles::sam::header::record::value::map::header::Version::new(1, 6)
);
let so = hd.other_fields().get(b"SO").expect("should have SO");
assert_eq!(<_ as AsRef<[u8]>>::as_ref(so), b"coordinate");
}
#[test]
fn test_create_output_header_clears_stale_sort_tags() {
let hd = Map::<noodles::sam::header::record::value::map::Header>::builder()
.insert(header_tag::SORT_ORDER, BString::from("unsorted"))
.insert(header_tag::GROUP_ORDER, BString::from("query"))
.insert(header_tag::SUBSORT_ORDER, BString::from("template-coordinate"))
.build()
.expect("valid header");
let header = Header::builder().set_header(hd).build();
let output = create_output_header(keys::SortOrder::Coordinate, &header);
let hd = output.header().expect("should have @HD");
let so = hd.other_fields().get(b"SO").expect("should have SO");
assert_eq!(<_ as AsRef<[u8]>>::as_ref(so), b"coordinate");
assert!(hd.other_fields().get(b"GO").is_none(), "GO should be cleared");
assert!(hd.other_fields().get(b"SS").is_none(), "SS should be cleared");
}
#[test]
fn test_create_output_header_no_existing_hd() {
let header = Header::builder().build();
let output = create_output_header(keys::SortOrder::Coordinate, &header);
let hd = output.header().expect("should have @HD");
let so = hd.other_fields().get(b"SO").expect("should have SO");
assert_eq!(<_ as AsRef<[u8]>>::as_ref(so), b"coordinate");
}
}