use std::borrow::Cow;
use std::sync::Arc;
use super::{DataType, PrimitiveType, Schema, SchemaRef, StructField, StructType};
use crate::expressions::ExpressionStructPatchBuilder;
use crate::transforms::{transform_output_type, SchemaTransform};
use crate::{DeltaResult, Error};
fn has_no_non_void_fields(st: &StructType) -> bool {
st.fields().all(|f| *f.data_type() == DataType::VOID)
}
struct StripVoidFields;
impl<'a> SchemaTransform<'a> for StripVoidFields {
transform_output_type!(|'a, T| Option<Cow<'a, T>>);
fn transform_primitive(&mut self, ptype: &'a PrimitiveType) -> Option<Cow<'a, PrimitiveType>> {
(*ptype != PrimitiveType::Void).then_some(Cow::Borrowed(ptype))
}
}
pub(crate) fn strip_void_from_schema(schema: SchemaRef) -> SchemaRef {
match StripVoidFields.transform_struct(&schema) {
Some(Cow::Owned(stripped)) => Arc::new(stripped),
Some(Cow::Borrowed(_)) => schema,
None => Arc::new(StructType::new_unchecked(Vec::<StructField>::new())),
}
}
pub(crate) fn validate_schema_for_write(schema: &Schema) -> DeltaResult<()> {
ValidateForWrite {
container_depth: 0,
depth: 0,
}
.transform_struct(schema)
}
struct ValidateForWrite {
container_depth: usize,
depth: usize,
}
impl ValidateForWrite {
fn descend_into_container(&mut self, etype: &DataType, position: &str) -> DeltaResult<()> {
if *etype == DataType::VOID {
return Err(Error::schema(format!(
"Void type is not allowed as {position}"
)));
}
self.container_depth += 1;
let result = self.transform(etype);
self.container_depth -= 1;
result
}
}
impl<'a> SchemaTransform<'a> for ValidateForWrite {
transform_output_type!(|'a, T| DeltaResult<()>);
fn transform_struct(&mut self, stype: &'a StructType) -> DeltaResult<()> {
if has_no_non_void_fields(stype) {
return Err(Error::schema(if self.container_depth > 0 {
"A struct nested in Array or Map must contain at least one non-void field"
} else if self.depth == 0 {
"Table schema must contain at least one non-void column"
} else {
"Cannot write to a table with a struct that contains no non-void fields"
}));
}
self.depth += 1;
let result = self.recurse_into_struct(stype);
self.depth -= 1;
result
}
fn transform_struct_field(&mut self, field: &'a StructField) -> DeltaResult<()> {
if self.container_depth > 0 && *field.data_type() == DataType::VOID {
return Err(Error::schema(
"Void type is not allowed inside a struct nested in Array or Map",
));
}
self.recurse_into_struct_field(field)
}
fn transform_array_element(&mut self, etype: &'a DataType) -> DeltaResult<()> {
self.descend_into_container(etype, "an array element type")
}
fn transform_map_key(&mut self, etype: &'a DataType) -> DeltaResult<()> {
self.descend_into_container(etype, "a map key type")
}
fn transform_map_value(&mut self, etype: &'a DataType) -> DeltaResult<()> {
self.descend_into_container(etype, "a map value type")
}
}
pub(crate) fn add_void_stripping(
patch: ExpressionStructPatchBuilder,
st: &StructType,
) -> ExpressionStructPatchBuilder {
add_void_stripping_inner(patch, st, &mut Vec::new())
}
fn add_void_stripping_inner<'a>(
mut patch: ExpressionStructPatchBuilder,
st: &'a StructType,
path: &mut Vec<&'a str>,
) -> ExpressionStructPatchBuilder {
for field in st.fields() {
if *field.data_type() == DataType::VOID {
patch = patch.drop_at(path.iter().copied(), field.name());
} else if let DataType::Struct(inner) = field.data_type() {
path.push(field.name());
patch = add_void_stripping_inner(patch, inner, path);
path.pop();
}
}
patch
}
#[cfg(test)]
mod tests {
use super::*;
use crate::schema::{
ArrayType, ColumnMetadataKey, DataType, MapType, MetadataValue, StructField, StructType,
};
#[test]
fn test_validator_catches_void_in_map_from_json() {
let json = r#"{
"name": "m",
"type": {
"type": "map",
"keyType": "string",
"valueType": "void",
"valueContainsNull": true
},
"nullable": true,
"metadata": {}
}"#;
let field: StructField = serde_json::from_str(json).unwrap();
if let DataType::Map(map_type) = field.data_type() {
assert_eq!(*map_type.value_type(), DataType::VOID);
} else {
panic!("expected map type");
}
let schema = StructType::new_unchecked([field]);
assert!(validate_schema_for_write(&schema).is_err());
}
#[rstest::rstest]
#[case(
"void in array",
StructField::nullable("f", ArrayType::new(DataType::VOID, true)),
"array element type"
)]
#[case(
"void in map value",
StructField::nullable("f", MapType::new(DataType::STRING, DataType::VOID, true)),
"map value type"
)]
#[case(
"void in map key",
StructField::nullable("f", MapType::new(DataType::VOID, DataType::STRING, true)),
"map key type"
)]
#[case(
"void in array inside struct",
StructField::nullable(
"outer",
StructType::new_unchecked([
StructField::nullable("inner", ArrayType::new(DataType::VOID, true)),
])
),
"array element type"
)]
#[case(
"void in map inside array",
StructField::nullable(
"col",
ArrayType::new(MapType::new(DataType::STRING, DataType::VOID, true), true,),
),
"map value type"
)]
#[case(
"void inside struct nested in array",
StructField::nullable(
"arr",
ArrayType::new(
StructType::new_unchecked([
StructField::nullable("a", DataType::INTEGER),
StructField::nullable("b", DataType::VOID),
]),
true,
),
),
"Void type is not allowed inside"
)]
#[case(
"void inside struct nested in map value",
StructField::nullable(
"m",
MapType::new(
DataType::STRING,
StructType::new_unchecked([
StructField::nullable("a", DataType::INTEGER),
StructField::nullable("b", DataType::VOID),
]),
true,
),
),
"Void type is not allowed inside"
)]
#[case(
"void inside struct nested in map key",
StructField::nullable(
"m",
MapType::new(
StructType::new_unchecked([
StructField::nullable("a", DataType::INTEGER),
StructField::nullable("b", DataType::VOID),
]),
DataType::STRING,
true,
),
),
"Void type is not allowed inside"
)]
#[case(
"void inside struct nested in array inside array",
StructField::nullable(
"outer",
ArrayType::new(
ArrayType::new(
StructType::new_unchecked([
StructField::nullable("a", DataType::INTEGER),
StructField::nullable("b", DataType::VOID),
]),
true,
),
true,
),
),
"Void type is not allowed inside"
)]
#[case(
"void in deeply nested struct inside array",
StructField::nullable(
"arr",
ArrayType::new(
StructType::new_unchecked([
StructField::nullable("a", DataType::INTEGER),
StructField::nullable(
"b",
StructType::new_unchecked([
StructField::nullable("x", DataType::INTEGER),
StructField::nullable("y", DataType::VOID),
]),
),
]),
true,
),
),
"Void type is not allowed inside"
)]
#[case(
"void in struct inside array inside struct inside array",
StructField::nullable(
"outer",
ArrayType::new(
StructType::new_unchecked([StructField::nullable(
"inner",
ArrayType::new(
StructType::new_unchecked([StructField::nullable(
"v",
DataType::VOID,
)]),
true,
),
)]),
true,
),
),
"must contain at least one non-void field"
)]
#[case(
"empty struct nested in array",
StructField::nullable(
"arr",
ArrayType::new(
StructType::new_unchecked(Vec::<StructField>::new()),
true,
),
),
"struct nested in Array or Map must contain at least one non-void field"
)]
#[case(
"all-void struct nested in map value",
StructField::nullable(
"m",
MapType::new(
DataType::STRING,
StructType::new_unchecked([StructField::nullable(
"x",
DataType::VOID,
)]),
true,
),
),
"struct nested in Array or Map must contain at least one non-void field"
)]
fn test_void_in_complex_type_rejected(
#[case] desc: &str,
#[case] field: StructField,
#[case] expected_msg: &str,
) {
let schema = StructType::new_unchecked([field]);
let result = validate_schema_for_write(&schema);
assert!(
result.unwrap_err().to_string().contains(expected_msg),
"{desc}: expected error containing '{expected_msg}'"
);
}
#[rstest::rstest]
#[case(
"void top level ok",
StructType::new_unchecked([
StructField::nullable("id", DataType::INTEGER),
StructField::nullable("void_col", DataType::VOID),
])
)]
#[case(
"test no void ok",
StructType::new_unchecked([
StructField::nullable("id", DataType::INTEGER),
StructField::nullable("name", DataType::STRING),
])
)]
#[case(
"void in nested struct",
StructType::new_unchecked([StructField::nullable(
"s",
StructType::new_unchecked([
StructField::nullable("a", DataType::INTEGER),
StructField::nullable("b", DataType::VOID),
]),
)])
)]
#[case(
"array of struct without void",
StructType::new_unchecked([StructField::nullable(
"arr",
ArrayType::new(
StructType::new_unchecked([
StructField::nullable("a", DataType::INTEGER),
StructField::nullable("b", DataType::STRING),
]),
true,
),
)])
)]
#[case(
"map of struct without void",
StructType::new_unchecked([StructField::nullable(
"m",
MapType::new(
DataType::STRING,
StructType::new_unchecked([
StructField::nullable("a", DataType::INTEGER),
StructField::nullable("b", DataType::STRING),
]),
true,
),
)])
)]
fn test_valid_schema_for_complex_types(#[case] desc: &str, #[case] schema: StructType) {
validate_schema_for_write(&schema)
.unwrap_or_else(|e| panic!("{desc}: unexpected validation error: {e}"));
}
#[rstest::rstest]
#[case(
"with void column",
StructType::new_unchecked([
StructField::nullable("id", DataType::INTEGER),
StructField::nullable("void_col", DataType::VOID),
])
)]
#[case(
"no void",
StructType::new_unchecked([
StructField::nullable("id", DataType::INTEGER),
StructField::nullable("name", DataType::STRING),
])
)]
#[case(
"struct with mixed void",
StructType::new_unchecked([StructField::nullable(
"s",
StructType::new_unchecked([
StructField::nullable("a", DataType::INTEGER),
StructField::nullable("b", DataType::VOID),
]),
)])
)]
fn test_write_valid_schemas(#[case] desc: &str, #[case] schema: StructType) {
validate_schema_for_write(&schema)
.unwrap_or_else(|e| panic!("{desc}: unexpected validation error: {e}"));
}
#[rstest::rstest]
#[case(
"all void table",
StructType::new_unchecked([
StructField::nullable("a", DataType::VOID),
StructField::nullable("b", DataType::VOID),
]),
"at least one non-void column"
)]
#[case(
"all void struct",
StructType::new_unchecked([
StructField::nullable("id", DataType::INTEGER),
StructField::nullable(
"s",
StructType::new_unchecked([
StructField::nullable("x", DataType::VOID),
StructField::nullable("y", DataType::VOID),
]),
),
]),
"contains no non-void fields"
)]
#[case(
"void in array",
StructType::new_unchecked([StructField::nullable(
"arr",
ArrayType::new(DataType::VOID, true),
)]),
"array element type"
)]
#[case(
"void in map",
StructType::new_unchecked([StructField::nullable(
"m",
MapType::new(
DataType::STRING,
DataType::VOID,
true,
),
)]),
"map value type"
)]
#[case(
"nested all void struct",
StructType::new_unchecked([
StructField::nullable("id", DataType::INTEGER),
StructField::nullable(
"outer",
StructType::new_unchecked([
StructField::nullable(
"inner",
StructType::new_unchecked([StructField::nullable("x", DataType::VOID)]),
),
]),
),
]),
"contains no non-void fields"
)]
#[case(
"empty struct at top level",
StructType::new_unchecked(Vec::<StructField>::new()),
"at least one non-void column"
)]
#[case(
"nested empty struct",
StructType::new_unchecked([
StructField::nullable("id", DataType::INTEGER),
StructField::nullable(
"s",
StructType::new_unchecked(
Vec::<StructField>::new(),
),
),
]),
"contains no non-void fields"
)]
fn test_write_rejected_schemas(
#[case] desc: &str,
#[case] schema: StructType,
#[case] expected_msg: &str,
) {
let result = validate_schema_for_write(&schema);
assert!(
result.unwrap_err().to_string().contains(expected_msg),
"{desc}: expected error containing '{expected_msg}'"
);
}
#[rstest::rstest]
#[case(
"schema with no void is noop",
StructType::new_unchecked([
StructField::nullable("a", DataType::INTEGER),
StructField::nullable("b", DataType::STRING),
]),
StructType::new_unchecked([
StructField::nullable("a", DataType::INTEGER),
StructField::nullable("b", DataType::STRING),
])
)]
#[case(
"top-level void is dropped",
StructType::new_unchecked([
StructField::nullable("a", DataType::INTEGER),
StructField::nullable("v", DataType::VOID),
StructField::nullable("b", DataType::STRING),
]),
StructType::new_unchecked([
StructField::nullable("a", DataType::INTEGER),
StructField::nullable("b", DataType::STRING),
])
)]
#[case(
"nested struct with mixed void",
StructType::new_unchecked([StructField::nullable(
"s",
StructType::new_unchecked([
StructField::nullable("a", DataType::INTEGER),
StructField::nullable("b", DataType::VOID),
StructField::nullable("c", DataType::STRING),
]),
)]),
StructType::new_unchecked([StructField::nullable(
"s",
StructType::new_unchecked([
StructField::nullable("a", DataType::INTEGER),
StructField::nullable("c", DataType::STRING),
]),
)])
)]
#[case(
"deeply nested void",
StructType::new_unchecked([StructField::nullable(
"outer",
StructType::new_unchecked([StructField::nullable(
"inner",
StructType::new_unchecked([
StructField::nullable("a", DataType::INTEGER),
StructField::nullable("v", DataType::VOID),
]),
)]),
)]),
StructType::new_unchecked([StructField::nullable(
"outer",
StructType::new_unchecked([StructField::nullable(
"inner",
StructType::new_unchecked([StructField::nullable("a", DataType::INTEGER)]),
)]),
)])
)]
fn test_strip_void_from_schema(
#[case] desc: &str,
#[case] input: StructType,
#[case] expected: StructType,
) {
let stripped = strip_void_from_schema(Arc::new(input));
assert_eq!(*stripped, expected, "{desc}");
}
#[rstest::rstest]
#[case::array_of_void(DataType::from(ArrayType::new(DataType::VOID, true)))]
#[case::map_with_void_value(DataType::from(MapType::new(
DataType::STRING,
DataType::VOID,
true
)))]
#[case::map_with_void_key(DataType::from(MapType::new(
DataType::VOID,
DataType::STRING,
true
)))]
fn test_strip_drops_container_with_void(#[case] field_type: DataType) {
let schema = Arc::new(StructType::new_unchecked([
StructField::nullable("id", DataType::INTEGER),
StructField::nullable("c", field_type),
]));
let stripped = strip_void_from_schema(schema);
assert!(stripped.field("id").is_some());
assert!(stripped.field("c").is_none());
}
#[test]
fn test_strip_preserves_metadata() {
let mut s_field = StructField::nullable(
"s",
StructType::new_unchecked([
StructField::nullable("a", DataType::INTEGER),
StructField::nullable("b", DataType::VOID),
]),
);
s_field.metadata.insert(
ColumnMetadataKey::ColumnMappingPhysicalName.as_ref().into(),
MetadataValue::String("phys_s".into()),
);
let schema = Arc::new(StructType::new_unchecked([s_field]));
let stripped = strip_void_from_schema(schema);
assert_eq!(
stripped
.field("s")
.expect("s field present after strip")
.metadata
.get(ColumnMetadataKey::ColumnMappingPhysicalName.as_ref()),
Some(&MetadataValue::String("phys_s".into())),
"metadata should be preserved after stripping"
);
}
}