Expand description
A file format footer containing a serialized vortex-file Layout.
footer.fbs:
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors
include "vortex-array/array.fbs";
include "vortex-layout/layout.fbs";
// [postscript]
/// The `Postscript` is guaranteed by the file format to never exceed
/// 65528 bytes (i.e., u16::MAX - 8 bytes) in length, and is immediately
/// followed by an 8-byte `EndOfFile` struct.
///
/// An initial read of a Vortex file defaults to at least 64KB (u16::MAX bytes) and therefore
/// is guaranteed to cover at least the Postscript.
///
/// The reason for a postscript at all is to ensure minimal but all necessary footer information
/// can be read in two round trips. Since the DType is optional and possibly large, it lives in
/// its own segment. If the footer were arbitrary size, with a pointer to the DType segment, then
/// in the worst case we would need one round trip to read the footer length, one to read the full
/// footer and parse the DType offset, and a third to fetch the DType segment.
///
/// The segments pointed to by the postscript have inline compression and encryption specs to avoid
/// the need to fetch encryption schemes up-front.
table Postscript {
/// Segment containing the root `DType` flatbuffer.
dtype: PostscriptSegment;
/// Segment containing the root `Layout` flatbuffer (required).
layout: PostscriptSegment;
/// Segment containing the file-level `Statistics` flatbuffer.
statistics: PostscriptSegment;
/// Segment containing the 'Footer' flatbuffer (required)
footer: PostscriptSegment;
}
/// A `PostscriptSegment` describes the location of a segment in the file without referencing any
/// specification objects. That is, encryption and compression are defined inline.
table PostscriptSegment {
offset: uint64;
length: uint32;
alignment_exponent: uint8;
_compression: CompressionSpec;
_encryption: EncryptionSpec;
}
// [postscript]
// [footer]
/// The `FileStatistics` object contains file-level statistics for the Vortex file.
table FileStatistics {
/// Statistics for each field in the root schema. If the root schema is not a struct, there will
/// be a single entry in this array.
field_stats: [ArrayStats];
}
/// The `Registry` object stores dictionary-encoded configuration for segments,
/// compression schemes, encryption schemes, etc.
table Footer {
// Dictionary-encoded array specs, up to u16::MAX.
array_specs: [ArraySpec];
// Dictionary-encoded layout specs, up to u16::MAX.
layout_specs: [LayoutSpec];
// Dictionary-encoded segment specs, up to u32::MAX.
segment_specs: [SegmentSpec];
// Dictionary-encoded compress specs, up to u3::MAX (8).
compression_specs: [CompressionSpec];
// Dictionary-encoded encryption specs, up to u16::MAX.
encryption_specs: [EncryptionSpec];
}
/// An `ArraySpec` describes the type of a particular array.
///
/// These are identified by a globally unique string identifier, and looked up in the Vortex registry
/// at read-time.
table ArraySpec {
id: string (required);
}
/// A `LayoutSpec` describes the type of a particular layout.
///
/// These are identified by a globally unique string identifier, and looked up in the Vortex registry
/// at read-time.
table LayoutSpec {
id: string (required);
}
/// A `SegmentSpec` acts as the locator for a buffer within the file.
struct SegmentSpec {
/// Offset relative to the start of the file.
offset: uint64;
/// Length in bytes of the segment.
length: uint32;
/// Base-2 exponent of the alignment of the segment.
alignment_exponent: uint8;
// These two fields are reserved for future use and act as pointers
// into `FileLayout::compression_schemes` and `FileLayout::encryption_schemes`
// respectively. They are not used in the current version of the file format.
_compression: uint8;
_encryption: uint16;
}
enum CompressionScheme: uint8 {
None = 0,
LZ4 = 1,
ZLib = 2,
ZStd = 3,
}
/// Definition of a compression scheme.
table CompressionSpec {
scheme: CompressionScheme;
}
table EncryptionSpec {
}
// [footer]
root_type FileStatistics;
root_type Footer;
root_type Postscript;Structs§
- Array
Spec - An
ArraySpecdescribes the type of a particular array. - Array
Spec Args - Array
Spec Builder - Compression
Scheme - Compression
Spec - Definition of a compression scheme.
- Compression
Spec Args - Compression
Spec Builder - Encryption
Spec - Encryption
Spec Args - Encryption
Spec Builder - File
Statistics - The
FileStatisticsobject contains file-level statistics for the Vortex file. - File
Statistics Args - File
Statistics Builder - Footer
- The
Registryobject stores dictionary-encoded configuration for segments, compression schemes, encryption schemes, etc. - Footer
Args - Footer
Builder - Layout
Spec - A
LayoutSpecdescribes the type of a particular layout. - Layout
Spec Args - Layout
Spec Builder - Postscript
- The
Postscriptis guaranteed by the file format to never exceed 65528 bytes (i.e., u16::MAX - 8 bytes) in length, and is immediately followed by an 8-byteEndOfFilestruct. - Postscript
Args - Postscript
Builder - Postscript
Segment - A
PostscriptSegmentdescribes the location of a segment in the file without referencing any specification objects. That is, encryption and compression are defined inline. - Postscript
Segment Args - Postscript
Segment Builder - Segment
Spec - A
SegmentSpecacts as the locator for a buffer within the file.
Enums§
- Array
Spec Offset - Compression
Spec Offset - Encryption
Spec Offset - File
Statistics Offset - Footer
Offset - Layout
Spec Offset - Postscript
Offset - Postscript
Segment Offset
Constants§
- ENUM_
MAX_ COMPRESSION_ SCHEME Deprecated - ENUM_
MIN_ COMPRESSION_ SCHEME Deprecated - ENUM_
VALUES_ COMPRESSION_ SCHEME Deprecated
Functions§
- finish_
postscript_ buffer - finish_
size_ prefixed_ postscript_ buffer - root_
as_ postscript - Verifies that a buffer of bytes contains a
Postscriptand returns it. Note that verification is still experimental and may not catch every error, or be maximally performant. For the previous, unchecked, behavior useroot_as_postscript_unchecked. - root_
as_ ⚠postscript_ unchecked - Assumes, without verification, that a buffer of bytes contains a Postscript and returns it.
- root_
as_ postscript_ with_ opts - Verifies, with the given options, that a buffer of bytes
contains a
Postscriptand returns it. Note that verification is still experimental and may not catch every error, or be maximally performant. For the previous, unchecked, behavior useroot_as_postscript_unchecked. - size_
prefixed_ root_ as_ postscript - Verifies that a buffer of bytes contains a size prefixed
Postscriptand returns it. Note that verification is still experimental and may not catch every error, or be maximally performant. For the previous, unchecked, behavior usesize_prefixed_root_as_postscript_unchecked. - size_
prefixed_ ⚠root_ as_ postscript_ unchecked - Assumes, without verification, that a buffer of bytes contains a size prefixed Postscript and returns it.
- size_
prefixed_ root_ as_ postscript_ with_ opts - Verifies, with the given verifier options, that a buffer of
bytes contains a size prefixed
Postscriptand returns it. Note that verification is still experimental and may not catch every error, or be maximally performant. For the previous, unchecked, behavior useroot_as_postscript_unchecked.