#![allow(dead_code)]
use std::{
collections::VecDeque,
io::{BufRead, BufReader, Read},
ops::Deref,
path::{Path, PathBuf},
sync::{Once, OnceLock},
};
use flate2::read::GzDecoder;
use log::Level;
#[macro_export]
macro_rules! current_func {
() => {{
fn f() {}
fn type_name_of<T>(_: T) -> &'static str {
std::any::type_name::<T>()
}
let name = type_name_of(f);
name.strip_suffix("::f").unwrap()
}};
}
pub struct TempFile {
path: PathBuf,
cleanup: bool,
}
impl TempFile {
pub fn new(name: &str, cleanup: bool) -> Self {
let path = std::env::temp_dir().join(name);
Self { path, cleanup }
}
}
impl Deref for TempFile {
type Target = PathBuf;
fn deref(&self) -> &Self::Target {
&self.path
}
}
impl Drop for TempFile {
fn drop(&mut self) {
if self.cleanup {
if self.path.exists() {
if let Err(e) = std::fs::remove_file(&self.path) {
eprintln!(
"Warning: Failed to remove temporary file {:?}: {:?}",
self.path, e
);
}
} else {
eprintln!("Warning: Temporary file {:?} does not exist.", self.path);
}
}
}
}
pub struct OnlyChromosomes<R> {
inner: BufReader<R>,
buf: Option<VecDeque<u8>>,
chrom: &'static [&'static str],
}
impl<R: Read> OnlyChromosomes<R> {
pub fn new(inner: R, chrom: &'static [&'static str]) -> Self {
Self {
inner: BufReader::new(inner),
buf: None,
chrom,
}
}
pub fn buffer_more(&mut self) -> std::io::Result<usize> {
let mut tot = 0;
loop {
let mut line = Vec::new();
let n = self.inner.read_until(b'\n', &mut line)?;
if n == 0 {
return Ok(0);
}
tot += n;
if line.starts_with(b"#")
|| self
.chrom
.iter()
.any(|&c| line.split(|c| *c == b'\t').next() == Some(c.as_bytes()))
{
let line = line.into_iter().collect::<VecDeque<_>>();
self.buf = Some(line);
return Ok(tot);
}
}
}
}
impl<R: Read> Read for OnlyChromosomes<R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
if buf.is_empty() {
return Ok(0);
}
if self.buf.is_none() {
self.buffer_more()?;
}
let mut n = 0;
while let Some(byte) = self.buf.as_mut().unwrap().pop_front() {
buf[n] = byte;
n += 1;
if n == buf.len() {
break;
}
}
if n == 0 {
if self.buffer_more()? == 0 {
Ok(0)
} else {
self.read(buf)
}
} else {
Ok(n)
}
}
}
pub const TEST_FILE_GFF3_GENCODE_MOUSE_M35_FILENAME: &str =
"gencode.vM35.chr_patch_hapl_scaff.basic.annotation.gff3";
pub const TEST_FILE_GFF3_GENCODE_MOUSE_M35_URL: &str = "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M35/gencode.vM35.chr_patch_hapl_scaff.basic.annotation.gff3.gz";
static TEST_FILE_GFF3_GENCODE_MOUSE_M35: OnceLock<TestFile> = OnceLock::new();
pub const TEST_FILE_GFF3_GENCODE_MOUSE_M35_TRANSFORMER: &dyn Fn(Box<dyn Read>) -> Box<dyn Read> =
&|r| {
Box::new(OnlyChromosomes::new(
GzDecoder::new(r),
&[
"chr1",
"chr2",
"chr3",
"chr5",
"GL456221.1",
"chrM",
"ch11",
"ch17",
],
))
};
pub const TEST_FILE_GFF3_GENCODE_MOUSE_M35_EXPECT_OUTPUT_CKSUM: [&str; 1] = ["f6f3eb1d"];
pub fn get_test_file_gff3_gencode_mouse_m35() -> &'static TestFile {
TEST_FILE_GFF3_GENCODE_MOUSE_M35.get_or_init(|| {
TestFile::from_url(
TEST_FILE_GFF3_GENCODE_MOUSE_M35_FILENAME,
TEST_FILE_GFF3_GENCODE_MOUSE_M35_URL,
&TEST_FILE_GFF3_GENCODE_MOUSE_M35_TRANSFORMER,
&TEST_FILE_GFF3_GENCODE_MOUSE_M35_EXPECT_OUTPUT_CKSUM,
)
})
}
pub fn crc32_hex<R: Read>(mut r: R) -> String {
use crc::{Crc, CRC_32_CKSUM};
let crc = Crc::<u32>::new(&CRC_32_CKSUM);
let mut digest = crc.digest();
let mut buffer = [0; 1024];
loop {
let n = r.read(&mut buffer).unwrap();
if n == 0 {
break;
}
digest.update(&buffer[..n]);
}
format!("{:08x}", digest.finalize())
}
pub struct TestFile {
pub name: String,
expect_output_cksum: Vec<&'static str>,
}
impl TestFile {
pub fn new_fs(name: &str, expect_output_cksum: &[&'static str]) -> Self {
Self {
name: name.to_string(),
expect_output_cksum: expect_output_cksum.to_vec(),
}
}
pub fn from_url<RO: Read + ?Sized, F: Fn(Box<dyn Read>) -> Box<RO>>(
cache_name: &str,
url: &str,
pipe: &F,
expect_output_cksum: &[&'static str],
) -> Self {
let tmpdir = std::env::temp_dir();
let name = tmpdir.join(cache_name).to_string_lossy().to_string();
let path = Path::new(&name);
if path
.metadata()
.map(|metadata| metadata.is_file() && metadata.len() > 0)
.unwrap_or(false)
{
return Self {
name: path.to_str().unwrap().to_string(),
expect_output_cksum: expect_output_cksum.to_vec(),
};
}
if path.exists() {
std::fs::remove_file(path).unwrap();
}
let tmp_path = PathBuf::from(format!("{name}.download"));
let mut file = std::fs::File::create(&tmp_path).unwrap();
let resp = reqwest::blocking::get(url).unwrap();
std::io::copy(&mut pipe(Box::new(resp)), &mut file).unwrap();
std::fs::rename(&tmp_path, path).unwrap();
Self::new_fs(name.as_str(), expect_output_cksum)
}
pub fn execute_test<F: FnOnce(&str) -> String>(&self, name: &str, f: F) {
let output_cksum = f(&self.name);
if self.expect_output_cksum.is_empty() {
eprintln!("{}: not comparing cksum, got: {}", name, output_cksum);
} else {
assert!(
self.expect_output_cksum.contains(&output_cksum.as_str()),
"{}: cksum mismatch, got: {}",
name,
output_cksum
);
}
}
}
static TEST_LOGGER_INIT: Once = Once::new();
pub fn ensure_logger_initialized() {
TEST_LOGGER_INIT.call_once(|| {
simple_logger::init_with_level(Level::Info).unwrap();
});
}