use apache_avro::Schema;
pub fn manifest_entry_schema_v2() -> Result<Schema, apache_avro::Error> {
let schema_json = r#"{
"type": "record",
"name": "manifest_entry",
"fields": [
{
"name": "status",
"type": "int",
"field-id": 0,
"doc": "0=EXISTING, 1=ADDED, 2=DELETED"
},
{
"name": "snapshot_id",
"type": ["null", "long"],
"default": null,
"field-id": 1
},
{
"name": "sequence_number",
"type": ["null", "long"],
"default": null,
"field-id": 3
},
{
"name": "file_sequence_number",
"type": ["null", "long"],
"default": null,
"field-id": 4
},
{
"name": "data_file",
"type": {
"type": "record",
"name": "data_file",
"fields": [
{
"name": "content",
"type": "int",
"field-id": 134,
"doc": "0=DATA, 1=POSITION_DELETES, 2=EQUALITY_DELETES"
},
{
"name": "file_path",
"type": "string",
"field-id": 100
},
{
"name": "file_format",
"type": "string",
"field-id": 101
},
{
"name": "partition",
"type": {
"type": "record",
"name": "r102",
"fields": []
},
"field-id": 102
},
{
"name": "record_count",
"type": "long",
"field-id": 103
},
{
"name": "file_size_in_bytes",
"type": "long",
"field-id": 104
},
{
"name": "column_sizes",
"type": [
"null",
{
"type": "array",
"logicalType": "map",
"items": {
"type": "record",
"name": "k117_v118",
"fields": [
{
"name": "key",
"type": "int",
"field-id": 117
},
{
"name": "value",
"type": "long",
"field-id": 118
}
]
}
}
],
"default": null,
"field-id": 108
},
{
"name": "value_counts",
"type": [
"null",
{
"type": "array",
"logicalType": "map",
"items": {
"type": "record",
"name": "k119_v120",
"fields": [
{
"name": "key",
"type": "int",
"field-id": 119
},
{
"name": "value",
"type": "long",
"field-id": 120
}
]
}
}
],
"default": null,
"field-id": 109
},
{
"name": "null_value_counts",
"type": [
"null",
{
"type": "array",
"logicalType": "map",
"items": {
"type": "record",
"name": "k121_v122",
"fields": [
{
"name": "key",
"type": "int",
"field-id": 121
},
{
"name": "value",
"type": "long",
"field-id": 122
}
]
}
}
],
"default": null,
"field-id": 110
},
{
"name": "lower_bounds",
"type": [
"null",
{
"type": "array",
"logicalType": "map",
"items": {
"type": "record",
"name": "k126_v127",
"fields": [
{
"name": "key",
"type": "int",
"field-id": 126
},
{
"name": "value",
"type": "bytes",
"field-id": 127
}
]
}
}
],
"default": null,
"field-id": 125
},
{
"name": "upper_bounds",
"type": [
"null",
{
"type": "array",
"logicalType": "map",
"items": {
"type": "record",
"name": "k129_v130",
"fields": [
{
"name": "key",
"type": "int",
"field-id": 129
},
{
"name": "value",
"type": "bytes",
"field-id": 130
}
]
}
}
],
"default": null,
"field-id": 128
},
{
"name": "key_metadata",
"type": ["null", "bytes"],
"default": null,
"field-id": 131
},
{
"name": "split_offsets",
"type": [
"null",
{
"type": "array",
"items": "long",
"element-id": 133
}
],
"default": null,
"field-id": 132
},
{
"name": "equality_ids",
"type": [
"null",
{
"type": "array",
"items": "int",
"element-id": 138
}
],
"default": null,
"field-id": 135
},
{
"name": "sort_order_id",
"type": ["null", "int"],
"default": null,
"field-id": 140
}
]
},
"field-id": 2
}
]
}"#;
Schema::parse_str(schema_json)
}
pub fn manifest_list_schema_v2() -> Result<Schema, apache_avro::Error> {
let schema_json = r#"{
"type": "record",
"name": "manifest_file",
"fields": [
{
"name": "manifest_path",
"type": "string",
"field-id": 500
},
{
"name": "manifest_length",
"type": "long",
"field-id": 501
},
{
"name": "partition_spec_id",
"type": "int",
"field-id": 502
},
{
"name": "content",
"type": "int",
"field-id": 517,
"doc": "0=DATA, 1=DELETES"
},
{
"name": "sequence_number",
"type": "long",
"field-id": 515
},
{
"name": "min_sequence_number",
"type": "long",
"field-id": 516
},
{
"name": "added_snapshot_id",
"type": "long",
"field-id": 503
},
{
"name": "added_files_count",
"type": "int",
"field-id": 504
},
{
"name": "existing_files_count",
"type": "int",
"field-id": 505
},
{
"name": "deleted_files_count",
"type": "int",
"field-id": 506
},
{
"name": "added_rows_count",
"type": "long",
"field-id": 512
},
{
"name": "existing_rows_count",
"type": "long",
"field-id": 513
},
{
"name": "deleted_rows_count",
"type": "long",
"field-id": 514
},
{
"name": "partitions",
"type": [
"null",
{
"type": "array",
"items": {
"type": "record",
"name": "field_summary",
"fields": [
{
"name": "contains_null",
"type": "boolean",
"field-id": 509
},
{
"name": "contains_nan",
"type": ["null", "boolean"],
"default": null,
"field-id": 518
},
{
"name": "lower_bound",
"type": ["null", "bytes"],
"default": null,
"field-id": 510
},
{
"name": "upper_bound",
"type": ["null", "bytes"],
"default": null,
"field-id": 511
}
]
},
"element-id": 508
}
],
"default": null,
"field-id": 507
},
{
"name": "key_metadata",
"type": ["null", "bytes"],
"default": null,
"field-id": 519
}
]
}"#;
Schema::parse_str(schema_json)
}