use std::error::Error;
use std::fs::File;
use std::io::{Read, Seek, Write};
use std::{cell::RefCell, rc::Rc};
use crate::bbi::{BBI_MAX_ZOOM_LEVELS, BBI_RES_INCREMENT};
use crate::bigwig::{BigWigParameters, BigWigReader, BigWigWriter, OptionBigWig};
use crate::genome::Genome;
use crate::granges_row::GRangesRow;
use crate::netfile::NetFile;
use crate::track::{Track, TrackSequence};
use crate::track_generic::GenericTrack;
use crate::track_statistics::BinSummaryStatistics;
#[derive(Clone, Debug)]
pub struct BigWigTrack<R: Read + Seek> {
name: String,
bin_size: usize,
bin_overlap: usize,
bwr: RefCell<BigWigReader<R>>,
bin_sum_stat: BinSummaryStatistics,
genome: Genome,
init: f64,
}
impl<R: Read + Seek> BigWigTrack<R> {
fn from_reader(
reader: R,
name: String,
f: BinSummaryStatistics,
bin_size: usize,
bin_overlap: usize,
init: f64,
) -> Result<Self, Box<dyn Error>> {
let mut bwr = BigWigReader::new(reader)?;
let genome = bwr.genome().clone();
let bin_size = if bin_size == 0 {
bwr.get_bin_size()?
} else {
bin_size
};
Ok(Self {
name,
bin_size,
bin_overlap,
bwr: RefCell::new(bwr),
bin_sum_stat: f,
genome,
init,
})
}
}
impl BigWigTrack<NetFile> {
pub fn new(
filename: &str,
name: String,
f: BinSummaryStatistics,
bin_size: usize,
bin_overlap: usize,
init: f64,
) -> Result<Self, Box<dyn Error>> {
let reader = NetFile::open(filename)?;
Self::from_reader(reader, name, f, bin_size, bin_overlap, init)
}
}
pub struct LazyTrack<R: Read + Seek> {
track: BigWigTrack<R>,
}
impl<R: Read + Seek> LazyTrack<R> {
pub fn from_reader(
reader: R,
name: String,
f: BinSummaryStatistics,
bin_size: usize,
bin_overlap: usize,
init: f64,
) -> Result<Self, Box<dyn Error>> {
Ok(Self {
track: BigWigTrack::from_reader(reader, name, f, bin_size, bin_overlap, init)?,
})
}
pub fn filter_genome<F>(&mut self, f: F)
where
F: Fn(&str, usize) -> bool,
{
self.track.genome = self.track.genome.filter(f);
}
}
impl<R: Read + Seek> Track for LazyTrack<R> {
fn get_bin_size(&self) -> usize {
self.track.get_bin_size()
}
fn get_name(&self) -> String {
self.track.get_name()
}
fn get_seq_names(&self) -> Vec<String> {
self.track.get_seq_names()
}
fn get_genome(&self) -> &Genome {
self.track.get_genome()
}
fn get_sequence(&self, query: &str) -> Result<TrackSequence, Box<dyn Error>> {
self.track.get_sequence(query)
}
fn get_slice(&self, r: &GRangesRow) -> Result<Vec<f64>, Box<dyn Error>> {
self.track.get_slice(r)
}
}
pub struct LazyTrackFile {
track: LazyTrack<NetFile>,
}
impl LazyTrackFile {
pub fn open(
filename: &str,
name: String,
f: BinSummaryStatistics,
bin_size: usize,
bin_overlap: usize,
init: f64,
) -> Result<Self, Box<dyn Error>> {
let reader = NetFile::open(filename)?;
Ok(Self {
track: LazyTrack::from_reader(reader, name, f, bin_size, bin_overlap, init)?,
})
}
pub fn import_bigwig(
filename: &str,
name: &str,
f: BinSummaryStatistics,
bin_size: usize,
bin_overlap: usize,
init: f64,
) -> Result<Self, Box<dyn Error>> {
Self::open(filename, name.to_string(), f, bin_size, bin_overlap, init)
}
pub fn filter_genome<F>(&mut self, f: F)
where
F: Fn(&str, usize) -> bool,
{
self.track.filter_genome(f);
}
}
impl Track for LazyTrackFile {
fn get_bin_size(&self) -> usize {
self.track.get_bin_size()
}
fn get_name(&self) -> String {
self.track.get_name()
}
fn get_seq_names(&self) -> Vec<String> {
self.track.get_seq_names()
}
fn get_genome(&self) -> &Genome {
self.track.get_genome()
}
fn get_sequence(&self, query: &str) -> Result<TrackSequence, Box<dyn Error>> {
self.track.get_sequence(query)
}
fn get_slice(&self, r: &GRangesRow) -> Result<Vec<f64>, Box<dyn Error>> {
self.track.get_slice(r)
}
}
impl<R: Read + Seek> Track for BigWigTrack<R> {
fn get_bin_size(&self) -> usize {
self.bin_size
}
fn get_name(&self) -> String {
self.name.clone()
}
fn get_seq_names(&self) -> Vec<String> {
self.bwr.borrow().genome().seqnames.clone()
}
fn get_genome(&self) -> &Genome {
&self.genome
}
fn get_sequence(&self, query: &str) -> Result<TrackSequence, Box<dyn Error>> {
let (seq, bin_size) = self.bwr.borrow_mut().query_sequence(
query,
self.bin_sum_stat,
self.bin_size,
self.bin_overlap,
self.init,
)?;
let rc_seq = Rc::new(RefCell::new(seq));
Ok(TrackSequence::new(rc_seq, bin_size))
}
fn get_slice(&self, r: &GRangesRow) -> Result<Vec<f64>, Box<dyn Error>> {
let bin_from = r.range().from / self.bin_size;
let bin_to = r.range().to / self.bin_size;
let mut seq = vec![0.0; (bin_to - bin_from) as usize];
for item in
self.bwr
.borrow_mut()
.query(&r.seqname(), r.range().from, r.range().to, self.bin_size)
{
if let Err(err) = item {
return Err(Box::new(err));
}
let s = item.unwrap();
for i in (s.data.from..s.data.to).step_by(self.bin_size as usize) {
let j = (i - r.range().from as i32) / (self.bin_size as i32);
if (j as usize) < seq.len() {
seq[j as usize] = s.data.statistics.sum / s.data.statistics.valid;
}
}
}
Ok(seq)
}
}
impl<'a> GenericTrack<'a> {
fn bigwig_automatic_reduction_levels(&self, items_per_slot: usize) -> Vec<i32> {
let c = (BBI_RES_INCREMENT as usize) * self.track.get_bin_size();
let mut n = Vec::new();
let mut l = 0;
for &length in &self.track.get_genome().lengths {
if length / self.track.get_bin_size() > l {
l = length / self.track.get_bin_size();
}
}
let mut r = std::cmp::max(100, c);
while n.len() <= BBI_MAX_ZOOM_LEVELS {
if l / r > items_per_slot {
n.push(r as i32);
r *= c;
} else {
break;
}
}
n
}
pub fn write_bigwig<W: Write + Seek>(
&self,
writer: &mut W,
mut parameters_arg: Vec<OptionBigWig>,
) -> Result<(), Box<dyn Error>> {
let mut items_per_slot = BigWigParameters::default().items_per_slot;
let mut has_reduction_levels = false;
for p in ¶meters_arg {
if let OptionBigWig::ItemsPerSlot(x) = *p {
items_per_slot = x;
}
if let OptionBigWig::ReductionLevels(_) = *p {
has_reduction_levels = true;
}
}
if has_reduction_levels == false {
parameters_arg.push(OptionBigWig::ReductionLevels(
self.bigwig_automatic_reduction_levels(items_per_slot),
));
}
let mut bww = BigWigWriter::new(writer, self.track.get_genome().clone(), parameters_arg)?;
for name in self.track.get_seq_names() {
let sequence = self.track.get_sequence(&name)?;
bww.write(&name, &sequence.clone_as_vec(), self.track.get_bin_size())?;
}
bww.write_index()?;
for (i, &reduction_level) in bww.parameters().reduction_levels.clone().iter().enumerate() {
bww.start_zoom_data(i)?;
for name in self.track.get_seq_names() {
let sequence = self.track.get_sequence(&name)?;
bww.write_zoom(
&name,
&sequence.clone_as_vec(),
self.track.get_bin_size(),
reduction_level as usize,
i,
)?;
}
bww.write_index_zoom(i)?;
}
bww.close()?;
Ok(())
}
pub fn export_bigwig(
&self,
filename: &str,
params: Vec<OptionBigWig>,
) -> Result<(), Box<dyn Error>> {
let mut file = File::create(filename)?;
self.write_bigwig(&mut file, params)?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use std::fs;
use crate::bigwig::BigWigFile;
use crate::genome::Genome;
use crate::track::Track;
use crate::track_bigwig::OptionBigWig;
use crate::track_generic::GenericTrack;
use crate::track_simple::SimpleTrack;
use crate::track_statistics::bin_summary_statistics_from_string;
use super::LazyTrackFile;
#[test]
fn test_track_bigwig_1() {
let filename = "tests/test_bigwig_tmp.bw";
let nan = f64::NAN;
let seq_1 = vec![0.0, 0.0, 0.0, nan, 4.5, 5.6, 0.0, 7.8, 8.9, 0.0];
let seq_2 = vec![
0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 0.0, 0.0, 8.9, 9.0, 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7,
7.8, 8.9, 9.0,
];
let seq_3 = vec![nan, nan, nan, nan, 4.5, 5.6, nan, nan, nan, nan];
let sequences = vec![seq_1.clone(), seq_2.clone(), seq_3.clone()];
let seqnames = vec!["test1", "test2", "test3"]
.into_iter()
.map(|x| x.to_string())
.collect();
let lengths = vec![100, 200, 100];
let genome = Genome::new(seqnames, lengths);
let track = SimpleTrack::new("track_name".to_string(), sequences, genome, 10).unwrap();
let params = vec![OptionBigWig::ReductionLevels(vec![20])];
if let Err(e) = GenericTrack::wrap(&track).export_bigwig(filename, params) {
println!("{}", e);
}
let result = BigWigFile::new_reader(filename);
assert!(result.is_ok());
if let Ok(mut bw) = result {
assert_eq!(bw.query("test1", 0, 100, 10).count(), 9);
assert_eq!(bw.query("test2", 0, 200, 10).count(), 20);
assert_eq!(bw.query("test3", 0, 100, 10).count(), 2);
for item in bw.query("test1", 0, 100, 10) {
let result = item.unwrap();
let i = result.data.from as usize / 10;
assert_eq!(result.data_type, 3);
assert_eq!(result.data.from, (i as i32) * 10);
assert_eq!(result.data.to, (i as i32) * 10 + 10);
assert!((result.data.statistics.sum - seq_1[i]).abs() < 1e-4);
}
for (i, item) in bw.query("test2", 0, 200, 10).enumerate() {
let result = item.unwrap();
assert_eq!(result.data_type, 3);
assert_eq!(result.data.from, (i as i32) * 10);
assert_eq!(result.data.to, (i as i32) * 10 + 10);
assert!((result.data.statistics.sum - seq_2[i]).abs() < 1e-4);
}
for item in bw.query("test3", 0, 100, 10) {
let result = item.unwrap();
let i = result.data.from as usize / 10;
assert_eq!(result.data_type, 2);
assert!((result.data.statistics.sum - seq_3[i]).abs() < 1e-4);
}
for item in bw.query("test1", 0, 100, 20) {
let result = item.unwrap();
let i = result.data.from as usize / 10;
let mut val = 0.0;
let mut sum = 0.0;
if !seq_1[i].is_nan() {
sum += seq_1[i + 0];
val += 1.0;
}
if !seq_1[i + 1].is_nan() {
sum += seq_1[i + 1];
val += 1.0;
}
assert_eq!(result.data_type, 1);
assert!((result.data.statistics.valid - val).abs() < 1e-4);
assert!((result.data.statistics.sum - sum).abs() < 1e-4);
}
for item in bw.query("test2", 0, 200, 20) {
let result = item.unwrap();
let i = result.data.from as usize / 10;
assert_eq!(result.data_type, 1);
assert!((result.data.statistics.valid - 2.0).abs() < 1e-4);
assert!((result.data.statistics.sum - (seq_2[i] + seq_2[i + 1])).abs() < 1e-4);
}
for item in bw.query("test3", 0, 100, 20) {
let result = item.unwrap();
let i = result.data.from as usize / 10;
assert_eq!(result.data_type, 1);
assert!((result.data.statistics.valid - 2.0).abs() < 1e-4);
assert!((result.data.statistics.sum - (seq_3[i] + seq_3[i + 1])).abs() < 1e-4);
}
}
assert!(fs::remove_file(filename).is_ok());
}
#[test]
fn lazy_track_file_matches_eager_import() {
let summary = bin_summary_statistics_from_string("mean").unwrap();
let mut eager = SimpleTrack::empty("eager".to_string());
eager
.import_bigwig("tests/test_bigwig_2.bw", "eager", summary, 100, 0, f64::NAN)
.unwrap();
let lazy = LazyTrackFile::import_bigwig(
"tests/test_bigwig_2.bw",
"lazy",
summary,
100,
0,
f64::NAN,
)
.unwrap();
assert_eq!(lazy.get_bin_size(), eager.get_bin_size());
assert_eq!(lazy.get_seq_names(), eager.get_seq_names());
for seqname in lazy.get_seq_names() {
let lazy_values = lazy.get_sequence(&seqname).unwrap().clone_as_vec();
let eager_values = eager.get_sequence(&seqname).unwrap().clone_as_vec();
assert_eq!(lazy_values.len(), eager_values.len());
for (lazy_value, eager_value) in lazy_values.iter().zip(eager_values.iter()) {
if lazy_value.is_nan() && eager_value.is_nan() {
continue;
}
assert!((lazy_value - eager_value).abs() < 1e-10);
}
}
}
}