use crate::deletion_vector::core::DeletionVector;
use crate::io::{FileIO, FileRead};
use crate::spec::DataFileMeta;
use crate::Result;
use std::collections::HashMap;
use std::sync::Arc;
pub struct DeletionVectorFactory {
deletion_vectors: HashMap<String, Arc<DeletionVector>>,
}
impl DeletionVectorFactory {
pub async fn new(
file_io: &FileIO,
data_files: &[DataFileMeta],
data_deletion_files: Option<&[Option<crate::DeletionFile>]>,
) -> Result<Self> {
let mut deletion_vectors = HashMap::new();
let Some(data_deletion_files) = data_deletion_files else {
return Ok(DeletionVectorFactory { deletion_vectors });
};
for (data_file, opt_df) in data_files.iter().zip(data_deletion_files.iter()) {
let Some(df) = opt_df.as_ref() else {
continue;
};
let dv = Self::read(file_io, df).await?;
deletion_vectors.insert(data_file.file_name.clone(), Arc::new(dv));
}
Ok(DeletionVectorFactory { deletion_vectors })
}
pub fn get_deletion_vector(&self, data_file_name: &str) -> Option<&Arc<DeletionVector>> {
self.deletion_vectors.get(data_file_name)
}
async fn read(file_io: &FileIO, df: &crate::DeletionFile) -> Result<DeletionVector> {
let input = file_io.new_input(df.path())?;
let reader = input.reader().await?;
let offset = df.offset() as u64;
let len = df.length() as u64;
let bytes = reader
.read(offset..offset.saturating_add(len).saturating_add(8))
.await?;
DeletionVector::read_from_bytes(&bytes, Some(len))
}
}