use std::env;
use std::fs::File;
use std::io::{BufReader, BufWriter, Read, Write};
use std::path::Path;
use std::sync::Arc;
use git_version::git_version;
use lz4_flex::frame::{FrameDecoder, FrameEncoder};
use tempfile::NamedTempFile;
use super::gridpdf::GridArray;
use super::metadata::MetaData;
const GIT_VERSION: &str = git_version!(
args = ["--always", "--dirty", "--long", "--tags"],
cargo_prefix = "cargo:",
fallback = "unknown"
);
const CODE_VERSION: &str = env!("CARGO_PKG_VERSION");
#[derive(Debug)]
pub struct GridArrayWithMetadata {
pub grid: GridArray,
pub metadata: Arc<MetaData>,
}
pub struct GridArrayCollection;
impl GridArrayCollection {
pub fn compress<P: AsRef<Path>>(
grids: &[&GridArray],
metadata: &MetaData,
path: P,
) -> Result<(), Box<dyn std::error::Error>> {
let file = File::create(path)?;
let buf_writer = BufWriter::new(file);
let mut encoder = FrameEncoder::new(buf_writer);
let mut metadata_mut = metadata.clone();
if metadata_mut.git_version.is_empty() || metadata_mut.git_version == "unknown" {
metadata_mut.git_version = GIT_VERSION.to_string();
}
if metadata_mut.code_version.is_empty() {
metadata_mut.code_version = CODE_VERSION.to_string();
}
let metadata_serialized = bincode::serialize(&metadata_mut)?;
let metadata_size = metadata_serialized.len() as u64;
let metadata_size_bytes = bincode::serialize(&metadata_size)?;
encoder.write_all(&metadata_size_bytes)?;
encoder.write_all(&metadata_serialized)?;
let count = grids.len() as u64;
let count_bytes = bincode::serialize(&count)?;
encoder.write_all(&count_bytes)?;
let mut serialized_grids = Vec::new();
for grid in grids {
let serialized = bincode::serialize(grid)?;
serialized_grids.push(serialized);
}
let mut offsets = Vec::new();
let mut current_offset = 0u64;
for serialized in &serialized_grids {
offsets.push(current_offset);
current_offset += 8; current_offset += serialized.len() as u64;
}
let offset_table_size = (serialized_grids.len() * 8) as u64;
let offset_table_size_bytes = bincode::serialize(&offset_table_size)?;
encoder.write_all(&offset_table_size_bytes)?;
for offset in &offsets {
let offset_bytes = bincode::serialize(offset)?;
encoder.write_all(&offset_bytes)?;
}
for serialized in &serialized_grids {
let size = serialized.len() as u64;
let size_bytes = bincode::serialize(&size)?;
encoder.write_all(&size_bytes)?;
encoder.write_all(serialized)?;
}
let mut writer = encoder.finish()?;
writer.flush()?;
writer.get_mut().sync_all()?;
Ok(())
}
pub fn decompress<P: AsRef<Path>>(
path: P,
) -> Result<Vec<GridArrayWithMetadata>, Box<dyn std::error::Error>> {
let file = File::open(path)?;
let buf_reader = BufReader::new(file);
let mut decoder = FrameDecoder::new(buf_reader);
let mut decompressed = Vec::new();
decoder.read_to_end(&mut decompressed)?;
let mut cursor = std::io::Cursor::new(decompressed);
let metadata_size: u64 = bincode::deserialize_from(&mut cursor)?;
let mut metadata_bytes = vec![0u8; metadata_size as usize];
cursor.read_exact(&mut metadata_bytes)?;
let versioned_metadata: MetaData = bincode::deserialize(&metadata_bytes)?;
let shared_metadata = Arc::new(versioned_metadata);
let count: u64 = bincode::deserialize_from(&mut cursor)?;
let _offset_table_size: u64 = bincode::deserialize_from(&mut cursor)?;
let mut offsets = Vec::with_capacity(count as usize);
for _ in 0..count {
let offset: u64 = bincode::deserialize_from(&mut cursor)?;
offsets.push(offset);
}
let mut grids = Vec::with_capacity(count as usize);
for _ in 0..count {
let size: u64 = bincode::deserialize_from(&mut cursor)?;
let mut grid_bytes = vec![0u8; size as usize];
cursor.read_exact(&mut grid_bytes)?;
let grid: GridArray = bincode::deserialize(&grid_bytes)?;
grids.push(GridArrayWithMetadata {
grid,
metadata: Arc::clone(&shared_metadata),
});
}
Ok(grids)
}
pub fn extract_metadata<P: AsRef<Path>>(
path: P,
) -> Result<MetaData, Box<dyn std::error::Error>> {
let file = File::open(path)?;
let buf_reader = BufReader::new(file);
let mut decoder = FrameDecoder::new(buf_reader);
let mut decompressed = Vec::new();
decoder.read_to_end(&mut decompressed)?;
let mut cursor = std::io::Cursor::new(decompressed);
let metadata_size: u64 = bincode::deserialize_from(&mut cursor)?;
let mut metadata_bytes = vec![0u8; metadata_size as usize];
cursor.read_exact(&mut metadata_bytes)?;
let metadata: MetaData = bincode::deserialize(&metadata_bytes)?;
Ok(metadata)
}
}
pub struct GridArrayReader {
data: Vec<u8>,
metadata: Arc<MetaData>,
offsets: Vec<u64>,
count: u64,
data_start: u64,
}
impl GridArrayReader {
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self, Box<dyn std::error::Error>> {
match Self::from_file_v2(path.as_ref()) {
Ok(reader) => Ok(reader),
Err(err) => {
let error_string = format!("{:?}", err);
if error_string.contains("UnexpectedEof")
|| error_string.contains("Eof")
|| error_string.contains("Grid is not v2")
{
match Self::from_file_legacy(path.as_ref()) {
Ok(reader) => Ok(reader),
Err(legacy_err) => Err(format!(
"Failed to load PDF with both v0.2.1+ ({}) and v0.2.0 ({}) loaders",
err, legacy_err
)
.into()),
}
} else {
Err(err)
}
}
}
}
fn from_file_v2(path: &Path) -> Result<Self, Box<dyn std::error::Error>> {
let file = File::open(path)?;
let buf_reader = BufReader::new(file);
let mut decoder = FrameDecoder::new(buf_reader);
let mut data = Vec::new();
decoder.read_to_end(&mut data)?;
let mut cursor = std::io::Cursor::new(&data);
let metadata_size: u64 = bincode::deserialize_from(&mut cursor)?;
let mut metadata_bytes = vec![0u8; metadata_size as usize];
cursor.read_exact(&mut metadata_bytes)?;
let metadata: MetaData = bincode::deserialize(&metadata_bytes)?;
let shared_metadata = Arc::new(metadata);
let count: u64 = bincode::deserialize_from(&mut cursor)?;
let _offset_table_size: u64 = bincode::deserialize_from(&mut cursor)?;
let mut offsets = Vec::with_capacity(count as usize);
for _ in 0..count {
let offset: u64 = bincode::deserialize_from(&mut cursor)?;
offsets.push(offset);
}
let data_start = cursor.position();
Ok(Self {
data,
metadata: shared_metadata,
offsets,
count,
data_start,
})
}
fn from_file_legacy(path: &Path) -> Result<Self, Box<dyn std::error::Error>> {
let legacy_reader = neopdf_legacy::writer::GridArrayReader::from_file(path)?;
let legacy_metadata = legacy_reader.metadata();
let metadata = MetaData::from((**legacy_metadata).clone());
let mut new_grids = Vec::new();
for i in 0..legacy_reader.len() {
let legacy_grid_with_meta = legacy_reader.load_grid(i)?;
let converted_grid = Self::convert_legacy_grid(legacy_grid_with_meta.grid);
new_grids.push(converted_grid);
}
let temp_file = NamedTempFile::new()?;
let temp_path = temp_file.path();
let grid_refs: Vec<&GridArray> = new_grids.iter().collect();
GridArrayCollection::compress(&grid_refs, &metadata, temp_path)?;
let result = Self::from_file_v2(temp_path)?;
Ok(result)
}
fn convert_legacy_grid(legacy_grid: neopdf_legacy::gridpdf::GridArray) -> GridArray {
use crate::subgrid::{GridData, ParamRange, SubGrid};
use ndarray::Array1;
let subgrids: Vec<SubGrid> = legacy_grid
.subgrids
.into_iter()
.map(|legacy_subgrid| SubGrid {
xs: legacy_subgrid.xs,
q2s: legacy_subgrid.q2s,
kts: legacy_subgrid.kts,
xis: Array1::from_vec(vec![0.0]),
deltas: Array1::from_vec(vec![0.0]),
grid: GridData::Grid6D(legacy_subgrid.grid),
nucleons: legacy_subgrid.nucleons,
alphas: legacy_subgrid.alphas,
nucleons_range: ParamRange {
min: legacy_subgrid.nucleons_range.min,
max: legacy_subgrid.nucleons_range.max,
},
alphas_range: ParamRange {
min: legacy_subgrid.alphas_range.min,
max: legacy_subgrid.alphas_range.max,
},
xi_range: ParamRange::new(0.0, 0.0),
delta_range: ParamRange::new(0.0, 0.0),
kt_range: ParamRange {
min: legacy_subgrid.kt_range.min,
max: legacy_subgrid.kt_range.max,
},
x_range: ParamRange {
min: legacy_subgrid.x_range.min,
max: legacy_subgrid.x_range.max,
},
q2_range: ParamRange {
min: legacy_subgrid.q2_range.min,
max: legacy_subgrid.q2_range.max,
},
})
.collect();
GridArray {
pids: legacy_grid.pids,
subgrids,
}
}
pub fn len(&self) -> usize {
self.count as usize
}
pub fn is_empty(&self) -> bool {
self.count == 0
}
pub fn metadata(&self) -> &Arc<MetaData> {
&self.metadata
}
pub fn load_grid(
&self,
index: usize,
) -> Result<GridArrayWithMetadata, Box<dyn std::error::Error>> {
if index >= self.count as usize {
return Err(format!(
"Index {} out of bounds for collection of size {}",
index, self.count
)
.into());
}
let offset = self.data_start + self.offsets[index];
let mut cursor = std::io::Cursor::new(&self.data);
cursor.set_position(offset);
let size: u64 = bincode::deserialize_from(&mut cursor)?;
let mut grid_bytes = vec![0u8; size as usize];
cursor.read_exact(&mut grid_bytes)?;
let grid: GridArray = bincode::deserialize(&grid_bytes)?;
Ok(GridArrayWithMetadata {
grid,
metadata: Arc::clone(&self.metadata),
})
}
}
pub struct LazyGridArrayIterator {
cursor: std::io::Cursor<Vec<u8>>,
remaining: u64,
metadata: Arc<MetaData>,
buffer: Vec<u8>,
}
impl LazyGridArrayIterator {
pub fn new<R: Read>(reader: R) -> Result<Self, Box<dyn std::error::Error>> {
let mut decoder = FrameDecoder::new(reader);
let mut decompressed = Vec::new();
decoder.read_to_end(&mut decompressed)?;
let mut cursor = std::io::Cursor::new(decompressed);
let metadata_size: u64 = bincode::deserialize_from(&mut cursor)?;
let mut metadata_bytes = vec![0u8; metadata_size as usize];
cursor.read_exact(&mut metadata_bytes)?;
let metadata: MetaData = bincode::deserialize(&metadata_bytes)?;
let shared_metadata = Arc::new(metadata);
let count: u64 = bincode::deserialize_from(&mut cursor)?;
let offset_table_size: u64 = bincode::deserialize_from(&mut cursor)?;
let mut offset_table_bytes = vec![0u8; offset_table_size as usize];
cursor.read_exact(&mut offset_table_bytes)?;
Ok(Self {
cursor,
remaining: count,
metadata: shared_metadata,
buffer: Vec::new(),
})
}
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self, Box<dyn std::error::Error>> {
match Self::from_file_v2(path.as_ref()) {
Ok(iter) => Ok(iter),
Err(e) => {
let error_string = format!("{:?}", e);
if error_string.contains("UnexpectedEof")
|| error_string.contains("Eof")
|| error_string.contains("unexpected end of file")
{
match Self::from_file_legacy(path.as_ref()) {
Ok(iter) => Ok(iter),
Err(legacy_err) => Err(format!(
"Failed to load PDF with both v0.2.1+ ({}) and v0.2.0 ({}) loaders",
e, legacy_err
)
.into()),
}
} else {
Err(e)
}
}
}
}
fn from_file_v2(path: &Path) -> Result<Self, Box<dyn std::error::Error>> {
let file = File::open(path)?;
let buf_reader = BufReader::new(file);
Self::new(buf_reader)
}
fn from_file_legacy(path: &Path) -> Result<Self, Box<dyn std::error::Error>> {
let legacy_reader = neopdf_legacy::writer::GridArrayReader::from_file(path)?;
let legacy_metadata = legacy_reader.metadata();
let metadata = MetaData::from((**legacy_metadata).clone());
let mut new_grids = Vec::new();
for i in 0..legacy_reader.len() {
let legacy_grid_with_meta = legacy_reader.load_grid(i)?;
let converted_grid = GridArrayReader::convert_legacy_grid(legacy_grid_with_meta.grid);
new_grids.push(converted_grid);
}
let temp_file = NamedTempFile::new()?;
let temp_path = temp_file.path();
let grid_refs: Vec<&GridArray> = new_grids.iter().collect();
GridArrayCollection::compress(&grid_refs, &metadata, temp_path)?;
let result = Self::from_file_v2(temp_path)?;
Ok(result)
}
pub fn metadata(&self) -> &Arc<MetaData> {
&self.metadata
}
}
impl Iterator for LazyGridArrayIterator {
type Item = Result<GridArrayWithMetadata, Box<dyn std::error::Error>>;
fn next(&mut self) -> Option<Self::Item> {
if self.remaining == 0 {
return None;
}
let result = (|| -> Result<GridArrayWithMetadata, Box<dyn std::error::Error>> {
let size: u64 = bincode::deserialize_from(&mut self.cursor)?;
self.buffer.resize(size as usize, 0);
self.cursor.read_exact(&mut self.buffer)?;
let grid: GridArray = bincode::deserialize(&self.buffer)?;
Ok(GridArrayWithMetadata {
grid,
metadata: Arc::clone(&self.metadata),
})
})();
self.remaining -= 1;
Some(result)
}
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.remaining as usize;
(remaining, Some(remaining))
}
}
impl ExactSizeIterator for LazyGridArrayIterator {}
#[cfg(test)]
mod tests {
use super::*;
use ndarray::Array1;
use tempfile::NamedTempFile;
use crate::metadata::{InterpolatorType, MetaDataV2, SetType};
#[test]
fn test_collection_with_metadata() {
let metadata = MetaDataV2 {
set_desc: "Test PDF".into(),
set_index: 1,
num_members: 2,
x_min: 1e-5,
x_max: 1.0,
q_min: 1.0,
q_max: 1000.0,
flavors: vec![1, 2, 3],
format: "NeoPDF".into(),
alphas_q_values: vec![],
alphas_vals: vec![],
polarised: false,
set_type: SetType::SpaceLike,
interpolator_type: InterpolatorType::LogBicubic,
error_type: "replicas".into(),
hadron_pid: 2212,
git_version: String::new(),
code_version: String::new(),
flavor_scheme: String::new(),
order_qcd: 0,
alphas_order_qcd: 0,
m_w: 0.0,
m_z: 0.0,
m_up: 0.0,
m_down: 0.0,
m_strange: 0.0,
m_charm: 0.0,
m_bottom: 0.0,
m_top: 0.0,
alphas_type: String::new(),
number_flavors: 0,
xi_min: 0.0,
xi_max: 0.0,
delta_min: 0.0,
delta_max: 0.0,
};
let test_grid = test_grid();
let grids = vec![&test_grid, &test_grid];
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
GridArrayCollection::compress(&grids, &metadata, path).unwrap();
let extracted = GridArrayCollection::extract_metadata(path).unwrap();
assert_eq!(metadata.set_desc, extracted.set_desc);
assert_eq!(metadata.set_index, extracted.set_index);
let decompressed = GridArrayCollection::decompress(path).unwrap();
assert_eq!(decompressed.len(), 2);
for g in &decompressed {
assert_eq!(g.metadata.set_desc, "Test PDF");
assert_eq!(g.grid.pids, Array1::from(vec![1, 2, 3]));
}
let g_iter = LazyGridArrayIterator::from_file(path).unwrap();
assert_eq!(g_iter.metadata().set_index, 1);
assert_eq!(g_iter.count(), 2);
}
fn test_grid() -> GridArray {
GridArray {
pids: Array1::from(vec![1, 2, 3]),
subgrids: vec![],
}
}
}