use std::path::{Path, PathBuf};
use crate::agency::Agency;
use crate::config::Config;
use crate::dataset::Dataset;
use crate::error::{Error, Result};
use crate::metadata::{DatasetMetadata, VariableMetadata};
use crate::schema::{DatasetSchema, derive_schema_plan};
use crate::validate::{Issue, IssueCollection, validate_v5_schema};
use crate::xpt::XptVersion;
use crate::xpt::v5::write::{SplitWriter, XptWriter, estimate_file_size_gb};
#[derive(Debug)]
pub struct XptWriterBuilder {
dataset: Dataset,
agency: Option<Agency>,
config: Config,
version: XptVersion,
variable_meta: Option<Vec<VariableMetadata>>,
dataset_meta: Option<DatasetMetadata>,
}
impl XptWriterBuilder {
#[must_use]
pub fn new(dataset: Dataset) -> Self {
Self {
dataset,
agency: None,
config: Config::default(),
version: XptVersion::V5,
variable_meta: None,
dataset_meta: None,
}
}
pub fn agency(&mut self, agency: Agency) -> &mut Self {
self.agency = Some(agency);
self
}
#[allow(dead_code)]
pub(crate) fn config(&mut self, config: Config) -> &mut Self {
self.config = config;
self
}
pub fn xpt_version(&mut self, version: XptVersion) -> &mut Self {
self.version = version;
self
}
#[allow(dead_code)]
pub(crate) fn variable_metadata(&mut self, meta: Vec<VariableMetadata>) -> &mut Self {
self.variable_meta = Some(meta);
self
}
#[allow(dead_code)]
pub(crate) fn dataset_metadata(&mut self, meta: DatasetMetadata) -> &mut Self {
self.dataset_meta = Some(meta);
self
}
#[must_use = "this returns a Result that should be handled"]
pub fn finalize(mut self) -> Result<ValidatedWrite> {
if !self.version.is_implemented() {
return Err(Error::UnsupportedVersion {
version: self.version,
});
}
if let Some(agency) = self.agency
&& self.config.write.max_size_gb.is_none()
{
self.config.write.max_size_gb = Some(agency.max_file_size_gb());
}
let schema = derive_schema_plan(
&self.dataset,
self.dataset_meta.as_ref(),
self.variable_meta.as_deref(),
self.agency,
&self.config,
)?;
let mut issues = Vec::new();
issues.extend(validate_v5_schema(&schema));
if let Some(agency) = self.agency {
issues.extend(agency.validate(&schema, None));
}
if self.config.strict_checks && issues.has_errors() {
let error_messages: Vec<String> = issues.errors().map(ToString::to_string).collect();
return Err(Error::validation_failed(error_messages.join("; ")));
}
Ok(ValidatedWrite {
dataset: self.dataset,
schema,
issues,
config: self.config,
})
}
}
#[derive(Debug)]
pub struct ValidatedWrite {
dataset: Dataset,
schema: DatasetSchema,
issues: Vec<Issue>,
config: Config,
}
impl ValidatedWrite {
#[must_use]
pub fn issues(&self) -> &[Issue] {
&self.issues
}
#[must_use]
pub fn has_errors(&self) -> bool {
self.issues.has_errors()
}
#[must_use]
pub fn has_warnings(&self) -> bool {
self.issues.has_warnings()
}
#[must_use]
#[allow(dead_code)]
pub(crate) fn schema(&self) -> &DatasetSchema {
&self.schema
}
#[must_use = "this returns a Result that should be handled"]
pub fn write_path(self, path: impl AsRef<Path>) -> Result<Vec<PathBuf>> {
let path = path.as_ref();
if let Some(max_gb) = self.config.write.max_size_gb {
let estimated_gb = estimate_file_size_gb(&self.schema, self.dataset.nrows());
if estimated_gb > max_gb {
let split_writer = SplitWriter::new(path, max_gb, self.config.write);
return split_writer.write(&self.dataset, &self.schema);
}
}
let writer = XptWriter::create(path, self.config.write)?;
writer.write(&self.dataset, &self.schema)?;
Ok(vec![path.to_path_buf()])
}
#[must_use = "this returns a Result that should be handled"]
pub fn write_to<W: std::io::Write>(self, writer: W) -> Result<()> {
let xpt_writer = XptWriter::new(writer, self.config.write);
xpt_writer.write(&self.dataset, &self.schema)?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::dataset::{Column, ColumnData};
#[test]
fn test_write_plan_basic() {
let dataset = Dataset::new(
"AE",
vec![Column::new(
"AESEQ",
ColumnData::F64(vec![Some(1.0), Some(2.0)]),
)],
)
.unwrap();
let mut builder = XptWriterBuilder::new(dataset);
builder.xpt_version(XptVersion::V5);
let plan = builder.finalize();
assert!(plan.is_ok());
let finalized = plan.unwrap();
assert!(!finalized.has_errors());
}
#[test]
fn test_write_plan_with_agency() {
let dataset = Dataset::new(
"AE",
vec![Column::new("AESEQ", ColumnData::F64(vec![Some(1.0)]))],
)
.unwrap();
let mut builder = XptWriterBuilder::new(dataset);
builder.agency(Agency::FDA);
let plan = builder.finalize();
assert!(plan.is_ok());
}
#[test]
fn test_write_plan_v8_unsupported() {
let dataset = Dataset::new("AE", vec![]).unwrap();
let mut builder = XptWriterBuilder::new(dataset);
builder.xpt_version(XptVersion::V8);
let plan = builder.finalize();
assert!(plan.is_err());
}
}