use crate::schema::DatasetSchema;
use crate::xpt::v5::constants::{NAMESTR_LEN, RECORD_LEN};
mod overhead {
use super::RECORD_LEN;
pub const LIBRARY_HEADER: usize = 3 * RECORD_LEN;
pub const MEMBER_HEADER: usize = 3 * RECORD_LEN;
pub const NAMESTR_HEADER: usize = RECORD_LEN;
pub const OBS_HEADER: usize = RECORD_LEN;
}
#[must_use]
pub(crate) fn estimate_file_size(plan: &DatasetSchema, nrows: usize) -> usize {
let mut size = 0;
size += overhead::LIBRARY_HEADER;
size += overhead::MEMBER_HEADER;
size += overhead::NAMESTR_HEADER;
let namestr_bytes = plan.variables.len() * NAMESTR_LEN;
let namestr_records = namestr_bytes.div_ceil(RECORD_LEN);
size += namestr_records * RECORD_LEN;
size += overhead::OBS_HEADER;
let obs_bytes = nrows * plan.row_len;
let obs_records = obs_bytes.div_ceil(RECORD_LEN);
size += obs_records * RECORD_LEN;
size
}
#[must_use]
pub(crate) fn estimate_file_size_gb(plan: &DatasetSchema, nrows: usize) -> f64 {
let bytes = estimate_file_size(plan, nrows);
bytes as f64 / (1024.0 * 1024.0 * 1024.0)
}
#[must_use]
pub(crate) fn max_rows_for_size(plan: &DatasetSchema, max_bytes: usize) -> Option<usize> {
let mut fixed_overhead = overhead::LIBRARY_HEADER
+ overhead::MEMBER_HEADER
+ overhead::NAMESTR_HEADER
+ overhead::OBS_HEADER;
let namestr_bytes = plan.variables.len() * NAMESTR_LEN;
let namestr_records = namestr_bytes.div_ceil(RECORD_LEN);
fixed_overhead += namestr_records * RECORD_LEN;
if fixed_overhead >= max_bytes {
return None;
}
let available = max_bytes - fixed_overhead;
if plan.row_len == 0 {
return Some(usize::MAX); }
Some(available / plan.row_len)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::schema::plan::VariableSpec;
#[test]
fn test_estimate_file_size() {
let mut plan = DatasetSchema::new("AE");
plan.variables = vec![
VariableSpec::numeric("AESEQ"),
VariableSpec::character("USUBJID", 20),
];
plan.recalculate_positions();
let size = estimate_file_size(&plan, 100);
assert!(size > overhead::LIBRARY_HEADER + overhead::MEMBER_HEADER);
assert!(size > 100 * plan.row_len);
}
#[test]
fn test_max_rows_for_size() {
let mut plan = DatasetSchema::new("AE");
plan.variables = vec![VariableSpec::numeric("AESEQ")];
plan.recalculate_positions();
let max_rows = max_rows_for_size(&plan, 1024 * 1024);
assert!(max_rows.is_some());
assert!(max_rows.unwrap() > 0);
}
}