mod tests_computed_data_set_metadata;
mod tests_file_writer;
use std::collections::BTreeSet;
use std::convert::TryFrom;
use std::io::{Seek, SeekFrom, Write};
use std::path::{Path, PathBuf};
use std::rc::Rc;
use crate::data_set::DimensionSize;
use crate::data_vector::DataVector;
use crate::error::WriteError;
use crate::io::Offset;
use crate::{Attribute, DataSet, DataType, Dimension, Variable, Version};
use crate::io::{compute_padding_size, ABSENT_TAG, ATTRIBUTE_TAG, DIMENSION_TAG, VARIABLE_TAG};
use crate::{NC_FILL_F32, NC_FILL_F64, NC_FILL_I16, NC_FILL_I32, NC_FILL_I8, NC_FILL_U8};
macro_rules! impl_write_typed_chunk {
($func_name:ident, $prim_type:ty, $nc_fill_value:ident) => {
fn $func_name<T: Write>(
out_stream: &mut T,
slice: &[$prim_type],
) -> Result<usize, std::io::Error> {
const SIZE_OF: usize = std::mem::size_of::<$prim_type>();
let mut bytes: [u8; SIZE_OF];
for value in slice.iter() {
bytes = value.to_be_bytes();
out_stream.write_all(&bytes)?;
}
let mut num_bytes: usize = slice.len() * std::mem::size_of::<$prim_type>();
let padding_size: usize = compute_padding_size(num_bytes);
if padding_size > 0 {
let nc_fill_bytes: [u8; SIZE_OF] = $nc_fill_value.to_be_bytes();
let padding_bytes: Vec<u8> = nc_fill_bytes
.to_vec()
.into_iter()
.cycle()
.take(padding_size)
.collect();
out_stream.write_all(&padding_bytes)?;
num_bytes += padding_size;
}
Ok(num_bytes)
}
};
}
macro_rules! impl_write_typed_var {
($func_name:ident, $write_typed_chunk: path, $prim_type:ty, $data_type:path, $data_vector:path) => {
pub fn $func_name(
&mut self,
var_name: &str,
data: &[$prim_type],
) -> Result<(), WriteError> {
let header_def: &HeaderDefinition = self
.header_def
.as_ref()
.ok_or(WriteError::HeaderNotDefined)?;
let var: &Variable = header_def
.data_set
.find_var_from_name(var_name)
.map_err(|_err| WriteError::VariableNotDefined(var_name.to_owned()))?
.1;
if var.data_type != $data_type {
return Err(WriteError::VariableMismatchDataType {
var_name: var_name.to_owned(),
req: var.data_type(),
get: $data_type,
});
}
if var.len() != data.len() {
return Err(WriteError::VariableMismatchDataLength {
var_name: var_name.to_owned(),
req: var.len(),
get: data.len(),
});
}
let var_metadata: &ComputedVariableMetadata = header_def.get_var_metadata(var)?;
let begin_offset: u64 = i64::from(var_metadata.begin_offset.clone()) as u64;
match header_def.data_set.record_size() {
None => {
self.output_file.seek(SeekFrom::Start(begin_offset))?;
let _chunk_size: usize = $write_typed_chunk(&mut self.output_file, data)?;
}
Some(record_size) => {
let num_chunks: usize = var.num_chunks();
let chunk_len: usize = var.chunk_len();
for i in 0..num_chunks {
let start: usize = i * chunk_len;
let end: usize = (i + 1) * chunk_len;
let chunk_slice: &[$prim_type] = &data[start..end];
let position: u64 = begin_offset + ((i * record_size) as u64);
self.output_file.seek(SeekFrom::Start(position))?;
let _chunk_size: usize =
$write_typed_chunk(&mut self.output_file, chunk_slice)?;
}
}
}
let num_records: usize = header_def.data_set.num_records().unwrap_or(1);
self.written_records.push((var, (0..num_records).collect()));
Ok(())
}
};
}
macro_rules! impl_write_typed_record {
($func_name:ident, $write_typed_chunk: path, $prim_type:ty, $data_type: path) => {
pub fn $func_name(
&mut self,
var_name: &str,
record_index: usize,
record: &[$prim_type],
) -> Result<(), WriteError> {
let header_def: &HeaderDefinition = self
.header_def
.as_ref()
.ok_or(WriteError::HeaderNotDefined)?;
let var: &Variable = header_def
.data_set
.find_var_from_name(var_name)
.map_err(|_err| WriteError::VariableNotDefined(var_name.to_owned()))?
.1;
if var.data_type != $data_type {
return Err(WriteError::VariableMismatchDataType {
var_name: var_name.to_owned(),
req: var.data_type(),
get: $data_type,
});
}
let num_records: usize = header_def.data_set.num_records().unwrap_or(1);
if record_index >= num_records {
return Err(WriteError::RecordIndexExceeded {
index: record_index,
num_records,
});
}
if record.len() != var.chunk_len() {
return Err(WriteError::RecordMismatchDataLength {
var_name: var.name.clone(),
req: var.chunk_len(),
get: record.len(),
});
}
let var_metadata: &ComputedVariableMetadata = header_def.get_var_metadata(var)?;
let record_size: usize = header_def.data_set.record_size().unwrap_or(0);
let begin_offset: u64 = i64::from(var_metadata.begin_offset.clone()) as u64
+ (record_size * record_index) as u64;
self.output_file.seek(SeekFrom::Start(begin_offset))?;
let _chunk_size: usize = $write_typed_chunk(&mut self.output_file, record)?;
self.update_written_records(var, &[record_index][..])?;
Ok(())
}
};
}
macro_rules! impl_write_typed_chunk_nc_fill {
($func_name: ident, $prim_type:ty, $nc_fill_value:path) => {
fn $func_name<T: Write>(
out_stream: &mut T,
num_values: usize,
) -> Result<usize, std::io::Error> {
const SIZE_OF: usize = std::mem::size_of::<$prim_type>();
let bytes: [u8; SIZE_OF] = $nc_fill_value.to_be_bytes();
for _ in 0..num_values {
out_stream.write_all(&bytes)?;
}
let mut num_bytes: usize = num_values * std::mem::size_of::<$prim_type>();
let padding_size: usize = compute_padding_size(num_bytes);
if padding_size > 0 {
let nc_fill_bytes: [u8; SIZE_OF] = $nc_fill_value.to_be_bytes();
let padding_bytes: Vec<u8> = nc_fill_bytes
.to_vec()
.into_iter()
.cycle()
.take(padding_size)
.collect();
out_stream.write_all(&padding_bytes)?;
num_bytes += padding_size;
}
Ok(num_bytes)
}
};
}
pub trait SeekWrite: Seek + Write {}
impl<T: Seek + Write> SeekWrite for T {}
impl core::fmt::Debug for dyn SeekWrite {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
write!(f, "{:p}", self)
}
}
#[derive(Debug)]
pub struct FileWriter<'a> {
output_file_path: PathBuf,
output_file: Box<dyn SeekWrite>,
header_def: Option<HeaderDefinition<'a>>,
written_records: Vec<(&'a Variable, BTreeSet<usize>)>,
}
impl<'a> FileWriter<'a> {
pub fn open_seek_write(
file_name: &str,
output: Box<dyn SeekWrite>,
) -> Result<Self, WriteError> {
let path: PathBuf = PathBuf::from(file_name);
Ok(FileWriter {
output_file: output,
output_file_path: path,
header_def: None,
written_records: vec![],
})
}
pub fn open<P: std::convert::AsRef<Path>>(
output_file_path: P,
) -> Result<FileWriter<'a>, WriteError> {
let output_file_path: PathBuf = {
let mut path = PathBuf::new();
path.push(output_file_path);
path
};
let output_file: std::fs::File = std::fs::OpenOptions::new()
.read(false)
.write(true)
.create(true)
.create_new(false)
.truncate(true)
.append(false)
.open(output_file_path.clone())?;
Ok(FileWriter {
output_file: Box::new(output_file),
output_file_path,
header_def: None,
written_records: vec![],
})
}
pub fn create_new<P: std::convert::AsRef<Path>>(
output_file_path: P,
) -> Result<FileWriter<'a>, WriteError> {
let output_file_path: PathBuf = {
let mut path = PathBuf::new();
path.push(output_file_path);
path
};
let output_file: std::fs::File = std::fs::OpenOptions::new()
.read(false)
.write(true)
.create_new(true)
.open(output_file_path.clone())?;
Ok(FileWriter {
output_file: Box::new(output_file),
output_file_path,
header_def: None,
written_records: vec![],
})
}
pub fn file_path(&self) -> &Path {
&self.output_file_path
}
pub fn set_def(
&mut self,
data_set: &'a DataSet,
version: Version,
header_min_size: usize,
) -> Result<(), WriteError> {
match &self.header_def {
Some(_) => return Err(WriteError::HeaderAlreadyDefined),
None => {
self.header_def = Some(HeaderDefinition::new(data_set, version, header_min_size)?)
}
}
let _ = self.write_header()?;
Ok(())
}
pub fn header_is_defined(&self) -> bool {
self.header_def.is_some()
}
pub fn data_set(&self) -> Option<&'a DataSet> {
self.header_def
.as_ref()
.map(|header_def| header_def.data_set)
}
pub fn version(&self) -> Option<Version> {
self.header_def
.as_ref()
.map(|header_def| header_def.version.clone())
}
pub fn header_min_size(&self) -> Option<usize> {
self.header_def
.as_ref()
.map(|header_def| header_def.header_min_size)
}
pub fn close(mut self) -> Result<(), WriteError> {
let header_def: &HeaderDefinition = match self.header_def {
None => return Ok(()),
Some(ref header_def) => header_def,
};
let num_records: usize = header_def.data_set.num_records().unwrap_or(1);
let all_records: BTreeSet<usize> = (0..num_records).collect();
let not_written_records: Vec<(&'a Variable, Vec<usize>)> = {
let num_vars = header_def.data_set.vars.len();
let mut not_written_records: Vec<(&'a Variable, Vec<usize>)> =
Vec::with_capacity(num_vars);
for var in header_def.data_set.vars.iter() {
let written_records: Option<&BTreeSet<usize>> = self
.written_records
.iter()
.find(
|(var_2, _written_records): &&(&'a Variable, BTreeSet<usize>)| {
var == *var_2
},
)
.map(|(_var_2, written_records): &(&'a Variable, BTreeSet<_>)| written_records);
let not_written_record: Vec<usize> = match written_records {
None => all_records.clone().into_iter().collect(),
Some(written_records) => {
all_records.difference(written_records).cloned().collect()
}
};
not_written_records.push((var, not_written_record));
}
not_written_records
};
let record_size: usize = header_def.data_set.record_size().unwrap_or(0);
for (var, not_written_records) in not_written_records.into_iter() {
let chunk_len: usize = var.chunk_len();
let var_metadata: &ComputedVariableMetadata = header_def.get_var_metadata(var)?;
let begin_offset: usize = i64::from(var_metadata.begin_offset.clone()) as usize;
for i in not_written_records.into_iter() {
let position: usize = begin_offset + (i * record_size);
self.output_file.seek(SeekFrom::Start(position as u64))?;
let _num_bytes: usize = match var.data_type() {
DataType::I8 => {
FileWriter::write_chunk_nc_fill_i8(&mut self.output_file, chunk_len)
}
DataType::U8 => {
FileWriter::write_chunk_nc_fill_u8(&mut self.output_file, chunk_len)
}
DataType::I16 => {
FileWriter::write_chunk_nc_fill_i16(&mut self.output_file, chunk_len)
}
DataType::I32 => {
FileWriter::write_chunk_nc_fill_i32(&mut self.output_file, chunk_len)
}
DataType::F32 => {
FileWriter::write_chunk_nc_fill_f32(&mut self.output_file, chunk_len)
}
DataType::F64 => {
FileWriter::write_chunk_nc_fill_f64(&mut self.output_file, chunk_len)
}
}?;
}
}
Ok(())
}
impl_write_typed_chunk!(write_chunk_i8, i8, NC_FILL_I8);
impl_write_typed_chunk!(write_chunk_u8, u8, NC_FILL_U8);
impl_write_typed_chunk!(write_chunk_i16, i16, NC_FILL_I16);
impl_write_typed_chunk!(write_chunk_i32, i32, NC_FILL_I32);
impl_write_typed_chunk!(write_chunk_f32, f32, NC_FILL_F32);
impl_write_typed_chunk!(write_chunk_f64, f64, NC_FILL_F64);
impl_write_typed_var!(
write_var_i8,
FileWriter::write_chunk_i8,
i8,
DataType::I8,
DataVector::I8
);
impl_write_typed_var!(
write_var_u8,
FileWriter::write_chunk_u8,
u8,
DataType::U8,
DataVector::U8
);
impl_write_typed_var!(
write_var_i16,
FileWriter::write_chunk_i16,
i16,
DataType::I16,
DataVector::I16
);
impl_write_typed_var!(
write_var_i32,
FileWriter::write_chunk_i32,
i32,
DataType::I32,
DataVector::I32
);
impl_write_typed_var!(
write_var_f32,
FileWriter::write_chunk_f32,
f32,
DataType::F32,
DataVector::F32
);
impl_write_typed_var!(
write_var_f64,
FileWriter::write_chunk_f64,
f64,
DataType::F64,
DataVector::F64
);
impl_write_typed_record!(
write_record_i8,
FileWriter::write_chunk_i8,
i8,
DataType::I8
);
impl_write_typed_record!(
write_record_u8,
FileWriter::write_chunk_u8,
u8,
DataType::U8
);
impl_write_typed_record!(
write_record_i16,
FileWriter::write_chunk_i16,
i16,
DataType::I16
);
impl_write_typed_record!(
write_record_i32,
FileWriter::write_chunk_i32,
i32,
DataType::I32
);
impl_write_typed_record!(
write_record_f32,
FileWriter::write_chunk_f32,
f32,
DataType::F32
);
impl_write_typed_record!(
write_record_f64,
FileWriter::write_chunk_f64,
f64,
DataType::F64
);
impl_write_typed_chunk_nc_fill!(write_chunk_nc_fill_i8, i8, NC_FILL_I8);
impl_write_typed_chunk_nc_fill!(write_chunk_nc_fill_u8, u8, NC_FILL_U8);
impl_write_typed_chunk_nc_fill!(write_chunk_nc_fill_i16, i16, NC_FILL_I16);
impl_write_typed_chunk_nc_fill!(write_chunk_nc_fill_i32, i32, NC_FILL_I32);
impl_write_typed_chunk_nc_fill!(write_chunk_nc_fill_f32, f32, NC_FILL_F32);
impl_write_typed_chunk_nc_fill!(write_chunk_nc_fill_f64, f64, NC_FILL_F64);
fn update_written_records(
&mut self,
var: &'a Variable,
records: &[usize],
) -> Result<(), WriteError> {
let mut records_set: BTreeSet<usize> = records.iter().cloned().collect();
let written_records: &mut Option<&mut BTreeSet<usize>> = &mut self
.written_records
.iter_mut()
.find(|(var_2, _written_records): &&mut (&'a Variable, BTreeSet<usize>)| var == *var_2)
.map(|(_var_2, written_records): &mut (&'a Variable, BTreeSet<usize>)| written_records);
if let Some(ref mut already_written_records_set) = written_records {
already_written_records_set.append(&mut records_set);
} else {
self.written_records.push((var, records_set));
}
Ok(())
}
fn write_header(&mut self) -> Result<usize, WriteError> {
let header_def: &HeaderDefinition = self
.header_def
.as_ref()
.ok_or(WriteError::HeaderNotDefined)?;
self.output_file.seek(SeekFrom::Start(0))?;
let mut num_bytes = 0;
num_bytes += self.output_file.write("CDF".as_bytes())?;
num_bytes += self
.output_file
.write(&[header_def.version.clone() as u8])?;
let num_records: u32 = match header_def.data_set.unlimited_dim.as_ref() {
None => 0, Some(unlim_dim) => {
let num_records: usize = unlim_dim.size();
if num_records <= (i32::MAX as usize) {
num_records as u32
} else {
u32::MAX }
}
};
let bytes: [u8; 4] = num_records.to_be_bytes();
num_bytes += self.output_file.write(&bytes)?;
num_bytes += FileWriter::write_dims_list(&mut self.output_file, &header_def.data_set.dims)?;
num_bytes +=
FileWriter::write_attrs_list(&mut self.output_file, &header_def.data_set.attrs)?;
let data_set_metadata: &ComputedDataSetMetadata = &header_def.data_set_metadata;
num_bytes +=
FileWriter::write_vars_list(&mut self.output_file, &data_set_metadata.vars_metadata)?;
let zero_padding_size: &usize = &data_set_metadata.header_zero_padding_size;
for _ in 0..*zero_padding_size {
num_bytes += self.output_file.write(&[0_u8])?;
}
Ok(num_bytes)
}
fn write_name_string<T: Write>(
out_stream: &mut T,
name: &str,
) -> Result<usize, std::io::Error> {
let name_bytes: &[u8] = name.as_bytes();
let zero_padding_size = compute_padding_size(name_bytes.len());
let mut num_bytes = 0;
let bytes: [u8; 4] = (name_bytes.len() as i32).to_be_bytes();
num_bytes += out_stream.write(&bytes)?;
num_bytes += out_stream.write(name_bytes)?;
if zero_padding_size > 0 {
num_bytes += out_stream.write(&vec![0_u8; zero_padding_size])?;
}
Ok(num_bytes)
}
fn write_data_type<T: Write>(
out_stream: &mut T,
data_type: DataType,
) -> Result<usize, std::io::Error> {
let bytes: [u8; 4] = (data_type as i32).to_be_bytes();
let num_bytes: usize = out_stream.write(&bytes)?;
Ok(num_bytes)
}
fn write_dims_list<T: Write>(
out_stream: &mut T,
dims_list: &[Rc<Dimension>],
) -> Result<usize, std::io::Error> {
fn write_dim<T: Write>(
out_stream: &mut T,
dim: &Rc<Dimension>,
) -> Result<usize, std::io::Error> {
let mut num_bytes = FileWriter::write_name_string(out_stream, dim.name().as_ref())?;
let dim_size: usize = match dim.size {
DimensionSize::Unlimited(_) => 0, DimensionSize::Fixed(fixed_size) => fixed_size,
};
let bytes: [u8; 4] = (dim_size as i32).to_be_bytes();
num_bytes += out_stream.write(&bytes)?;
Ok(num_bytes)
}
let mut num_bytes: usize = 0;
if dims_list.is_empty() {
num_bytes += out_stream.write(ABSENT_TAG)?;
} else {
num_bytes += out_stream.write(DIMENSION_TAG)?;
let num_dims: usize = dims_list.len();
let bytes: [u8; 4] = (num_dims as i32).to_be_bytes();
num_bytes += out_stream.write(&bytes)?;
for dim in dims_list {
num_bytes += write_dim(out_stream, dim)?;
}
}
Ok(num_bytes)
}
fn write_attrs_list<T: Write>(
out_stream: &mut T,
attrs_list: &[Attribute],
) -> Result<usize, std::io::Error> {
fn write_attr<T: Write>(
out_stream: &mut T,
attr: &Attribute,
) -> Result<usize, std::io::Error> {
let mut num_bytes = FileWriter::write_name_string(out_stream, &attr.name)?;
num_bytes += FileWriter::write_data_type(out_stream, attr.data_type())?;
let num_elements: usize = attr.len();
let bytes: [u8; 4] = (num_elements as i32).to_be_bytes();
num_bytes += out_stream.write(&bytes)?;
num_bytes += match &attr.data {
DataVector::I8(slice) => FileWriter::write_chunk_i8(out_stream, slice)?,
DataVector::U8(slice) => FileWriter::write_chunk_u8(out_stream, slice)?,
DataVector::I16(slice) => FileWriter::write_chunk_i16(out_stream, slice)?,
DataVector::I32(slice) => FileWriter::write_chunk_i32(out_stream, slice)?,
DataVector::F32(slice) => FileWriter::write_chunk_f32(out_stream, slice)?,
DataVector::F64(slice) => FileWriter::write_chunk_f64(out_stream, slice)?,
};
Ok(num_bytes)
}
let mut num_bytes: usize = 0;
if attrs_list.is_empty() {
num_bytes += out_stream.write(ABSENT_TAG)?;
} else {
num_bytes += out_stream.write(ATTRIBUTE_TAG)?;
let num_attrs: usize = attrs_list.len();
let bytes: [u8; 4] = (num_attrs as i32).to_be_bytes();
num_bytes += out_stream.write(&bytes)?;
for attr in attrs_list {
num_bytes += write_attr(out_stream, attr)?;
}
}
Ok(num_bytes)
}
fn write_vars_list<T: Write>(
out_stream: &mut T,
vars_metadata_list: &[(&Variable, ComputedVariableMetadata)],
) -> Result<usize, WriteError> {
fn write_var<T: Write>(
out_stream: &mut T,
var: &Variable,
var_metadata: &ComputedVariableMetadata,
) -> Result<usize, WriteError> {
let mut num_bytes: usize = FileWriter::write_name_string(out_stream, &var.name)?;
let num_dims = var.num_dims();
let mut bytes: [u8; 4] = (num_dims as i32).to_be_bytes();
num_bytes += out_stream.write(&bytes)?;
for dim_id in var_metadata.dim_ids.iter() {
bytes = (*dim_id as i32).to_be_bytes();
num_bytes += out_stream.write(&bytes)?;
}
num_bytes += FileWriter::write_attrs_list(out_stream, &var.attrs)?;
num_bytes += FileWriter::write_data_type(out_stream, var.data_type.clone())?;
bytes = {
let mut chunk_size: usize = var_metadata.chunk_size;
if chunk_size > (i32::MAX as usize) {
chunk_size = u32::MAX as usize;
}
(chunk_size as u32).to_be_bytes()
};
num_bytes += out_stream.write(&bytes)?;
match var_metadata.begin_offset {
Offset::I32(begin_offset) => {
let bytes: [u8; 4] = begin_offset.to_be_bytes();
num_bytes += out_stream.write(&bytes)?;
}
Offset::I64(begin_offset) => {
let bytes: [u8; 8] = begin_offset.to_be_bytes();
num_bytes += out_stream.write(&bytes)?;
}
}
Ok(num_bytes)
}
let mut num_bytes: usize = 0;
if vars_metadata_list.is_empty() {
num_bytes += out_stream.write(ABSENT_TAG)?;
} else {
num_bytes += out_stream.write(VARIABLE_TAG)?;
let num_vars: usize = vars_metadata_list.len();
let bytes: [u8; 4] = (num_vars as i32).to_be_bytes();
num_bytes += out_stream.write(&bytes)?;
for (var, var_metadata) in vars_metadata_list.iter() {
num_bytes += write_var(out_stream, var, var_metadata)?;
}
}
Ok(num_bytes)
}
}
#[derive(Debug)]
struct HeaderDefinition<'a> {
data_set: &'a DataSet,
version: Version,
header_min_size: usize,
data_set_metadata: ComputedDataSetMetadata<'a>,
}
impl<'a> HeaderDefinition<'a> {
fn new(
data_set: &'a DataSet,
version: Version,
header_min_size: usize,
) -> Result<HeaderDefinition<'a>, WriteError> {
Ok(HeaderDefinition {
data_set,
version: version.clone(),
header_min_size,
data_set_metadata: ComputedDataSetMetadata::new(data_set, version, header_min_size)?,
})
}
fn get_var_metadata(&self, var: &'a Variable) -> Result<&ComputedVariableMetadata, WriteError> {
self.data_set_metadata
.vars_metadata
.iter()
.find(|(var_2, _var_metadata): &&(&Variable, ComputedVariableMetadata)| var == *var_2)
.map(|(_var, var_metadata): &(&Variable, ComputedVariableMetadata)| var_metadata)
.ok_or(WriteError::Unexpected)
}
}
#[derive(Debug)]
struct ComputedDataSetMetadata<'a> {
#[allow(dead_code)]
header_required_size: usize,
header_zero_padding_size: usize,
vars_metadata: Vec<(&'a Variable, ComputedVariableMetadata)>,
}
#[derive(Debug)]
struct ComputedVariableMetadata {
dim_ids: Vec<usize>,
chunk_size: usize,
begin_offset: Offset,
}
impl<'a> ComputedDataSetMetadata<'a> {
fn new(
data_set: &'a DataSet,
version: Version,
header_min_size: usize,
) -> Result<ComputedDataSetMetadata<'a>, WriteError> {
#[allow(clippy::type_complexity)]
let (record_vars, non_record_vars): (
Vec<(usize, &Variable)>,
Vec<(usize, &Variable)>,
) = data_set
.vars
.iter()
.enumerate() .partition(|(_var_pos, var): &(usize, &Variable)| var.is_record_var());
let partitioned_vars: Vec<(usize, &Variable)> =
non_record_vars.into_iter().chain(record_vars).collect();
let header_required_size: usize =
ComputedDataSetMetadata::compute_header_required_size(data_set, version.clone());
let header_size: usize = {
let mut header_size: usize = std::cmp::max(header_min_size, header_required_size);
header_size += compute_padding_size(header_size);
header_size
};
let mut begin_offset: usize = header_size;
let mut vars_metadata: Vec<(usize, (&Variable, ComputedVariableMetadata))> = vec![];
for (header_part_pos, var) in partitioned_vars.into_iter() {
let chunk_size: usize = var.chunk_size();
vars_metadata.push((
header_part_pos,
(
var,
ComputedVariableMetadata {
dim_ids: data_set.get_var_dim_ids(&var.name).unwrap(),
chunk_size,
begin_offset: match &version {
Version::Classic => {
let offset: i32 = i32::try_from(begin_offset)
.map_err(|_err| WriteError::ClassicVersionNotPossible)?;
Offset::I32(offset)
}
Version::Offset64Bit => Offset::I64(begin_offset as i64),
},
},
),
));
begin_offset += chunk_size;
}
vars_metadata.sort_by_key(
|(header_part_pos, (_var, _var_metadata)): &(
usize,
(&Variable, ComputedVariableMetadata),
)| *header_part_pos,
);
let vars_metadata: Vec<(&'a Variable, ComputedVariableMetadata)> =
vars_metadata.into_iter().map(|x| x.1).collect();
Ok(ComputedDataSetMetadata {
header_required_size,
header_zero_padding_size: header_size - header_required_size,
vars_metadata,
})
}
fn compute_header_required_size(data_set: &'a DataSet, version: Version) -> usize {
fn compute_name_string_size(name: &str) -> usize {
let mut num_bytes: usize = 0;
num_bytes += std::mem::size_of::<i32>();
let num_bytes_name = name.len();
num_bytes += num_bytes_name;
num_bytes += compute_padding_size(num_bytes_name);
num_bytes
}
fn compute_attrs_list_size(attrs_list: &[Attribute]) -> usize {
let mut num_bytes: usize = 0;
if attrs_list.is_empty() {
num_bytes += ABSENT_TAG.len();
} else {
num_bytes += ATTRIBUTE_TAG.len();
num_bytes += std::mem::size_of::<i32>();
for attr in attrs_list.iter() {
num_bytes += compute_name_string_size(&attr.name);
num_bytes += std::mem::size_of::<i32>();
num_bytes += std::mem::size_of::<i32>();
let num_useful_bytes = attr.len() * attr.data_type().size_of();
num_bytes += num_useful_bytes;
num_bytes += compute_padding_size(num_useful_bytes);
}
}
num_bytes
}
let mut num_bytes = 0;
num_bytes += 3;
num_bytes += std::mem::size_of::<u8>();
num_bytes += std::mem::size_of::<i32>();
if data_set.dims.is_empty() {
num_bytes += ABSENT_TAG.len();
} else {
num_bytes += DIMENSION_TAG.len();
num_bytes += std::mem::size_of::<i32>();
for dim in data_set.dims.iter() {
num_bytes += compute_name_string_size(&dim.name.borrow());
num_bytes += std::mem::size_of::<i32>();
}
}
num_bytes += compute_attrs_list_size(&data_set.attrs);
if data_set.vars.is_empty() {
num_bytes += ABSENT_TAG.len();
} else {
num_bytes += VARIABLE_TAG.len();
num_bytes += std::mem::size_of::<i32>();
for var in data_set.vars.iter() {
num_bytes += compute_name_string_size(&var.name);
num_bytes += std::mem::size_of::<i32>();
num_bytes += var.num_dims() * std::mem::size_of::<i32>();
num_bytes += compute_attrs_list_size(&var.attrs);
num_bytes += std::mem::size_of::<i32>();
num_bytes += std::mem::size_of::<i32>();
num_bytes += match version {
Version::Classic => std::mem::size_of::<i32>(),
Version::Offset64Bit => std::mem::size_of::<i64>(),
}
}
}
num_bytes
}
}