use std::io::{Read, Seek};
use arrow::array::RecordBatchReader;
use arrow::datatypes::Schema as ArrowSchema;
use bigtools::BigBedRead;
use crate::bbi::model::base::BatchBuilder;
use crate::bbi::model::base::BedSchema;
use crate::bbi::model::base::Model;
use crate::bbi::scanner::batch_iterator::base::{BigBedBatchIterator, BigBedQueryBatchIterator};
use crate::Select;
pub struct Scanner {
model: Model,
info: bigtools::BBIFileInfo,
}
impl Scanner {
pub fn new(
bed_schema: BedSchema,
info: bigtools::BBIFileInfo,
fields: Select<String>,
) -> crate::Result<Self> {
let model = Model::new(bed_schema, fields)?;
Ok(Self { model, info })
}
pub fn with_model(model: Model, info: bigtools::BBIFileInfo) -> Self {
Self { model, info }
}
pub fn model(&self) -> &Model {
&self.model
}
pub fn field_names(&self) -> Vec<String> {
self.model.field_names()
}
pub fn schema(&self) -> &ArrowSchema {
self.model.schema()
}
fn build_batch_builder(
&self,
columns: Option<Vec<String>>,
capacity: usize,
) -> crate::Result<BatchBuilder> {
match columns {
None => BatchBuilder::from_model(&self.model, capacity),
Some(cols) => {
let projected = self.model.project(&cols)?;
BatchBuilder::from_model(&projected, capacity)
}
}
}
}
impl Scanner {
pub fn info(&self) -> &bigtools::BBIFileInfo {
&self.info
}
pub fn chrom_names(&self) -> Vec<String> {
let chroms = &self.info.chrom_info;
chroms.iter().map(|info| info.name.clone()).collect()
}
pub fn chrom_sizes(&self) -> Vec<(String, u32)> {
let chroms = &self.info.chrom_info;
chroms
.iter()
.map(|info| (info.name.clone(), info.length))
.collect()
}
pub fn zoom_levels(&self) -> Vec<u32> {
let zoom_levels: Vec<u32> = self
.info
.zoom_headers
.iter()
.map(|header| header.reduction_level)
.collect();
zoom_levels
}
pub fn scan<R: Read + Seek>(
&self,
fmt_reader: BigBedRead<R>,
columns: Option<Vec<String>>,
batch_size: Option<usize>,
limit: Option<usize>,
) -> crate::Result<impl RecordBatchReader> {
let batch_size = batch_size.unwrap_or(1024);
let batch_builder = self.build_batch_builder(columns, batch_size)?;
let batch_iter = BigBedBatchIterator::new(fmt_reader, batch_builder, batch_size, limit);
Ok(batch_iter)
}
pub fn scan_query<R: Read + Seek + Send + 'static>(
&self,
fmt_reader: BigBedRead<R>,
region: noodles::core::Region,
columns: Option<Vec<String>>,
batch_size: Option<usize>,
limit: Option<usize>,
) -> crate::Result<impl RecordBatchReader> {
let batch_size = batch_size.unwrap_or(1024);
let batch_builder = self.build_batch_builder(columns, batch_size)?;
let batch_iter =
BigBedQueryBatchIterator::new(fmt_reader, region, batch_builder, batch_size, limit);
Ok(batch_iter)
}
}