use crate::{Error, GetDNARestrictive};
use rust_htslib::bam;
use std::fs::File;
use std::io::Write as _;
use std::path::Path;
use url::Url;
pub fn nanalogue_bam_reader<J>(bam_path: &J) -> Result<bam::Reader, Error>
where
J: AsRef<Path> + ?Sized,
{
Ok(bam::Reader::from_path(bam_path)?)
}
pub fn nanalogue_bam_reader_from_stdin() -> Result<bam::Reader, Error> {
Ok(bam::Reader::from_stdin()?)
}
pub fn nanalogue_bam_reader_from_url(url: &Url) -> Result<bam::Reader, Error> {
crate::init_ssl_certificates();
Ok(bam::Reader::from_url(url)?)
}
pub fn nanalogue_indexed_bam_reader<J>(
bam_path: &J,
fetch_definition: bam::FetchDefinition,
) -> Result<bam::IndexedReader, Error>
where
J: AsRef<Path> + ?Sized,
{
let mut bam_reader = bam::IndexedReader::from_path(bam_path)?;
bam_reader.fetch(fetch_definition)?;
Ok(bam_reader)
}
pub fn nanalogue_indexed_bam_reader_from_url(
url: &Url,
fetch_definition: bam::FetchDefinition,
) -> Result<bam::IndexedReader, Error> {
crate::init_ssl_certificates();
let mut bam_reader = bam::IndexedReader::from_url(url)?;
bam_reader.fetch(fetch_definition)?;
Ok(bam_reader)
}
pub fn write_fasta<I, J, K>(sequences: I, output_path: &J) -> Result<(), Error>
where
I: IntoIterator<Item = (String, K)>,
J: AsRef<Path> + ?Sized,
K: GetDNARestrictive,
{
let mut file = File::create(output_path)?;
for seq in sequences {
writeln!(file, ">{}", seq.0)?;
file.write_all(seq.1.get_dna_restrictive().get())?;
writeln!(file)?;
}
Ok(())
}
pub fn write_bam_denovo<I, J, K, L, M>(
reads: I,
contigs: J,
read_groups: K,
comments: L,
output_path: &M,
) -> Result<(), Error>
where
I: IntoIterator<Item = bam::Record>,
J: IntoIterator<Item = (String, usize)>,
K: IntoIterator<Item = String>,
L: IntoIterator<Item = String>,
M: AsRef<Path> + ?Sized,
{
let header = {
let mut header = bam::Header::new();
for k in contigs {
let _: &mut _ = header.push_record(
bam::header::HeaderRecord::new(b"SQ")
.push_tag(b"SN", k.0)
.push_tag(b"LN", k.1),
);
}
for k in read_groups {
let _: &mut _ = header.push_record(
bam::header::HeaderRecord::new(b"RG")
.push_tag(b"ID", k)
.push_tag(b"PL", "ONT")
.push_tag(b"LB", "blank")
.push_tag(b"SM", "blank")
.push_tag(b"PU", "blank"),
);
}
for k in comments {
let _: &mut _ = header.push_comment(k.as_bytes());
}
header
};
let mut writer = bam::Writer::from_path(output_path, &header, bam::Format::Bam)?;
let mut curr_read_key: (bool, i32, i64, bool);
let mut prev_read_key: (bool, i32, i64, bool) = (false, -1, -1, false);
for read in reads {
curr_read_key = (
read.is_unmapped(),
read.tid(),
read.pos(),
read.is_reverse(),
);
if prev_read_key > curr_read_key {
return Err(Error::InvalidSorting(
"reads input to write bam denovo have not been sorted properly".to_string(),
));
}
prev_read_key = curr_read_key;
writer.write(&read)?;
}
drop(writer);
bam::index::build(output_path, None, bam::index::Type::Bai, 2)?;
Ok(())
}
#[expect(clippy::panic, reason = "panic on error is standard practice in tests")]
#[cfg(test)]
mod tests {
use super::*;
use crate::DNARestrictive;
use rust_htslib::bam::Read as _;
use std::str::FromStr as _;
use uuid::Uuid;
#[test]
fn write_fasta_works() {
let contigs = vec![
(
"test_contig_0".to_string(),
DNARestrictive::from_str("ACGT").expect("no error"),
),
(
"test_contig_1".to_string(),
DNARestrictive::from_str("TGCA").expect("no error"),
),
];
let temp_path = std::env::temp_dir().join(format!("{}.fa", Uuid::new_v4()));
write_fasta(contigs, &temp_path).expect("no error");
let content = std::fs::read_to_string(&temp_path).expect("no error");
assert_eq!(content, ">test_contig_0\nACGT\n>test_contig_1\nTGCA\n");
std::fs::remove_file(&temp_path).expect("no error");
}
#[test]
fn nanalogue_bam_reader_valid_path() {
let mut reader = match nanalogue_bam_reader("examples/example_1.bam") {
Ok(r) => r,
Err(e) => panic!("Failed to read BAM file: {e:?}"),
};
assert_eq!(reader.records().count(), 4);
}
#[test]
fn nanalogue_bam_reader_invalid_path() {
let result = nanalogue_bam_reader("nonexistent_file.bam");
let _err = result.unwrap_err();
}
#[test]
fn nanalogue_indexed_bam_reader_fetch_all() {
let mut reader =
match nanalogue_indexed_bam_reader("examples/example_1.bam", bam::FetchDefinition::All)
{
Ok(r) => r,
Err(e) => panic!("Failed to read indexed BAM file: {e:?}"),
};
assert_eq!(reader.records().count(), 4);
}
#[test]
fn nanalogue_indexed_bam_reader_fetch_contig() {
let mut reader = match nanalogue_indexed_bam_reader(
"examples/example_1.bam",
bam::FetchDefinition::String(b"dummyI"),
) {
Ok(r) => r,
Err(e) => panic!("Failed to read indexed BAM file: {e:?}"),
};
assert_eq!(reader.records().count(), 1);
}
#[test]
fn nanalogue_indexed_bam_reader_fetch_nonexistent_contig() {
let result = nanalogue_indexed_bam_reader(
"examples/example_1.bam",
bam::FetchDefinition::String(b"nonexistent_contig"),
);
let _err = result.unwrap_err();
}
#[test]
fn nanalogue_indexed_bam_reader_fetch_region_with_reads() {
let mut reader = match nanalogue_indexed_bam_reader(
"examples/example_1.bam",
bam::FetchDefinition::RegionString(b"dummyIII", 20, 30),
) {
Ok(r) => r,
Err(e) => panic!("Failed to read indexed BAM file: {e:?}"),
};
assert_eq!(reader.records().count(), 1);
}
#[test]
fn nanalogue_indexed_bam_reader_fetch_region_without_reads() {
let mut reader = match nanalogue_indexed_bam_reader(
"examples/example_1.bam",
bam::FetchDefinition::RegionString(b"dummyIII", 10, 20),
) {
Ok(r) => r,
Err(e) => panic!("Failed to read indexed BAM file: {e:?}"),
};
assert_eq!(reader.records().count(), 0);
}
#[test]
fn write_bam_denovo_empty_reads() {
let contigs = vec![("chr1".to_string(), 1000)];
let read_groups = vec!["rg1".to_string()];
let comments = vec!["test comment".to_string()];
let reads: Vec<bam::Record> = vec![];
let temp_path = std::env::temp_dir().join(format!("{}.bam", Uuid::new_v4()));
match write_bam_denovo(reads, contigs, read_groups, comments, &temp_path) {
Ok(()) => (),
Err(e) => panic!("Failed to write BAM file: {e:?}"),
}
let reader = nanalogue_bam_reader(temp_path.to_str().unwrap()).expect("no error");
assert_eq!(reader.header().target_count(), 1);
std::fs::remove_file(&temp_path).expect("no error");
std::fs::remove_file(format!("{}.bai", temp_path.display())).expect("no error");
}
#[test]
#[expect(
clippy::similar_names,
reason = "read1, read2, and reads are clear in this test context"
)]
fn write_bam_denovo_unsorted_reads_error() {
let contigs = vec![("chr1".to_string(), 1000), ("chr2".to_string(), 1000)];
let read_groups = vec!["rg1".to_string()];
let comments = vec![];
let mut read1 = bam::Record::new();
read1.set_tid(1);
read1.set_pos(100);
let mut read2 = bam::Record::new();
read2.set_tid(0);
read2.set_pos(50);
let reads = vec![read1, read2];
let temp_path = std::env::temp_dir().join(format!("{}.bam", Uuid::new_v4()));
let result = write_bam_denovo(reads, contigs, read_groups, comments, &temp_path);
let err = result.unwrap_err();
assert!(matches!(err, Error::InvalidSorting(_)));
drop(std::fs::remove_file(&temp_path));
drop(std::fs::remove_file(format!("{}.bai", temp_path.display())));
}
#[test]
#[expect(
clippy::similar_names,
reason = "read1, read2, and reads are clear in this test context"
)]
fn write_bam_denovo_sorted_reads_same_contig() {
let contigs = vec![("chr1".to_string(), 1000)];
let read_groups = vec!["rg1".to_string()];
let comments = vec![];
let mut read1 = bam::Record::new();
read1.set_tid(0);
read1.set_pos(50);
read1.set(b"read1", None, b"ACGT", &[30, 30, 30, 30]);
let mut read2 = bam::Record::new();
read2.set_tid(0);
read2.set_pos(100);
read2.set(b"read2", None, b"TGCA", &[30, 30, 30, 30]);
let reads = vec![read1, read2];
let temp_path = std::env::temp_dir().join(format!("{}.bam", Uuid::new_v4()));
match write_bam_denovo(reads, contigs, read_groups, comments, &temp_path) {
Ok(()) => (),
Err(e) => panic!("Failed to write BAM file: {e:?}"),
}
let mut reader = nanalogue_bam_reader(temp_path.to_str().unwrap()).expect("no error");
assert_eq!(reader.records().count(), 2);
std::fs::remove_file(&temp_path).expect("no error");
std::fs::remove_file(format!("{}.bai", temp_path.display())).expect("no error");
}
#[test]
fn write_bam_denovo_multiple_contigs_and_read_groups() {
let contigs = vec![
("chr1".to_string(), 1000),
("chr2".to_string(), 2000),
("chr3".to_string(), 1500),
];
let read_groups = vec!["rg1".to_string(), "rg2".to_string()];
let comments = vec![
"comment1".to_string(),
"comment2".to_string(),
"comment3".to_string(),
];
let reads: Vec<bam::Record> = vec![];
let temp_path = std::env::temp_dir().join(format!("{}.bam", Uuid::new_v4()));
match write_bam_denovo(reads, contigs, read_groups, comments, &temp_path) {
Ok(()) => (),
Err(e) => panic!("Failed to write BAM file: {e:?}"),
}
let reader = nanalogue_bam_reader(temp_path.to_str().unwrap()).expect("no error");
assert_eq!(reader.header().target_count(), 3);
let header = reader.header();
let header_text = std::str::from_utf8(header.as_bytes()).expect("no error");
assert!(header_text.contains("@RG\tID:rg1"));
assert!(header_text.contains("@RG\tID:rg2"));
std::fs::remove_file(&temp_path).expect("no error");
std::fs::remove_file(format!("{}.bai", temp_path.display())).expect("no error");
}
#[test]
fn write_bam_denovo_with_unmapped_reads() {
let contigs = vec![("chr1".to_string(), 1000)];
let read_groups = vec!["rg1".to_string()];
let comments = vec![];
let mut read = bam::Record::new();
read.set(b"unmapped_read", None, b"ACGT", &[30, 30, 30, 30]);
read.set_unmapped();
let reads = vec![read];
let temp_path = std::env::temp_dir().join(format!("{}.bam", Uuid::new_v4()));
match write_bam_denovo(reads, contigs, read_groups, comments, &temp_path) {
Ok(()) => (),
Err(e) => panic!("Failed to write BAM file: {e:?}"),
}
let mut reader = nanalogue_bam_reader(temp_path.to_str().unwrap()).expect("no error");
let records: Vec<_> = reader
.records()
.collect::<Result<Vec<_>, _>>()
.expect("no error");
assert_eq!(records.len(), 1);
assert!(records.first().expect("record exists").is_unmapped());
std::fs::remove_file(&temp_path).expect("no error");
std::fs::remove_file(format!("{}.bai", temp_path.display())).expect("no error");
}
}