/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
syntax = "proto3";
package lance.format.pb;
import "google/protobuf/timestamp.proto";
/*
Format:
+----------------------------------------+
| Encoded Column 0, Chunk 0 |
...
| Encoded Column M, Chunk N - 1 |
| Encoded Column M, Chunk N |
| Indices ... |
| Chunk Position (M x N x 8) |
| Manifest (Optional) |
| Metadata |
| i64: metadata position |
| MAJOR_VERSION | MINOR_VERSION | "LANC" |
+----------------------------------------+
*/
/// UUID type. encoded as 16 bytes.
message UUID {
bytes uuid = 1;
}
// Manifest is a global section shared between all the files.
message Manifest {
// All fields of the dataset, including the nested fields.
repeated Field fields = 1;
// Fragments of the dataset.
repeated DataFragment fragments = 2;
// Snapshot version number.
uint64 version = 3;
// The file position of the version auxiliary data.
// * It is not inheritable between versions.
// * It is not loaded by default during query.
uint64 version_aux_data = 4;
// Schema metadata.
map<string, bytes> metadata = 5;
message WriterVersion {
// The name of the library that created this file.
string library = 1;
// The version of the library that created this file. Because we cannot assume
// that the library is semantically versioned, this is a string. However, if it
// is semantically versioned, it should be a valid semver string without any 'v'
// prefix. For example: `2.0.0`, `2.0.0-rc.1`.
string version = 2;
}
// The version of the writer that created this file.
//
// This information may be used to detect whether the file may have known bugs
// associated with that writer.
WriterVersion writer_version = 13;
// If presented, the file position of the index metadata.
optional uint64 index_section = 6;
// Version creation Timestamp, UTC timezone
google.protobuf.Timestamp timestamp = 7;
// Optional version tag
string tag = 8;
// Feature flags for readers.
//
// A bitmap of flags that indicate which features are required to be able to
// read the table. If a reader does not recognize a flag that is set, it
// should not attempt to read the dataset.
//
// Known flags:
// * 1: deletion files are present
uint64 reader_feature_flags = 9;
// Feature flags for writers.
//
// A bitmap of flags that indicate which features are required to be able to
// write to the dataset. if a writer does not recognize a flag that is set, it
// should not attempt to write to the dataset.
//
// The flags are the same as for reader_feature_flags, although they will not
// always apply to both.
uint64 writer_feature_flags = 10;
// The highest fragment ID that has been used so far.
//
// This ID is not guaranteed to be present in the current version, but it may
// have been used in previous versions.
//
// For a single file, will be zero.
uint32 max_fragment_id = 11;
// Path to the transaction file, relative to `{root}/_transactions`
//
// This contains a serialized Transaction message representing the transaction
// that created this version.
//
// May be empty if no transaction file was written.
//
// The path format is "{read_version}-{uuid}.txn" where {read_version} is the
// version of the table the transaction read from, and {uuid} is a
// hyphen-separated UUID.
string transaction_file = 12;
} // Manifest
// Auxiliary Data attached to a version.
// Only load on-demand.
message VersionAuxData {
// key-value metadata.
map<string, bytes> metadata = 3;
}
// Metadata describing the index.
message IndexMetadata {
// Unique ID of an index. It is unique across all the dataset versions.
UUID uuid = 1;
// The columns to build the index.
repeated int32 fields = 2;
// Index name. Must be unique within one dataset version.
string name = 3;
// The version of the dataset this index was built from.
uint64 dataset_version = 4;
/// A bitmap of the included fragment ids.
///
/// This may by used to determine how much of the dataset is covered by the
/// index. This information can be retrieved from the dataset by looking at
/// the dataset at `dataset_version`. However, since the old version may be
/// deleted while the index is still in use, this information is also stored
/// in the index.
///
/// The bitmap is stored as a 32-bit Roaring bitmap.
bytes fragment_bitmap = 5;
}
// Index Section, containing a list of index metadata for one dataset version.
message IndexSection {
repeated IndexMetadata indices = 1;
}
// Data fragment. A fragment is a set of files which represent the
// different columns of the same rows.
// If column exists in the schema, but the related file does not exist,
// treat this column as nulls.
message DataFragment {
// Unique ID of each DataFragment
uint64 id = 1;
repeated DataFile files = 2;
// File that indicates which rows, if any, should be considered deleted.
DeletionFile deletion_file = 3;
// Number of original rows in the fragment, not counting deletions. To compute
// the current number of rows, subtract `deletion_file.num_deleted_rows` from
// this value.
uint64 physical_rows = 4;
}
// Lance Data File
message DataFile {
// Relative path to the root.
string path = 1;
// The ids of the fields/columns in this file.
//
// IDs are assigned based on position in the file, offset by the max existing
// field id in the table (if any already). So when a fragment is first created
// with one file of N columns, the field ids will be 1, 2, ..., N. If a second,
// fragment is created with M columns, the field ids will be N+1, N+2, ..., N+M.
//
// These ids must be sorted and contiguous.
repeated int32 fields = 2;
} // DataFile
// Deletion File
//
// The path of the deletion file is constructed as:
// {root}/_deletions/{fragment_id}-{read_version}-{id}.{extension}
// where {extension} is `.arrow` or `.bin` depending on the type of deletion.
message DeletionFile {
// Type of deletion file, which varies depending on what is the most efficient
// way to store the deleted row ids. If none, then will be unspecified. If there are
// sparsely deleted rows, then ARROW_ARRAY is the most efficient. If there are
// densely deleted rows, then BIT_MAP is the most efficient.
enum DeletionFileType {
// Deletion file is a single Int32Array of deleted row ids. This is stored as
// an Arrow IPC file with one batch and one column. Has a .arrow extension.
ARROW_ARRAY = 0;
// Deletion file is a Roaring Bitmap of deleted row ids. Has a .bin extension.
BITMAP = 1;
}
// Type of deletion file. If it is unspecified, then the remaining fields will be missing.
DeletionFileType file_type = 1;
// The version of the dataset this deletion file was built from.
uint64 read_version = 2;
// An opaque id used to differentiate this file from others written by concurrent
// writers.
uint64 id = 3;
// The number of rows that are marked as deleted.
uint64 num_deleted_rows = 4;
} // DeletionFile
// Metadata of one Lance file.
message Metadata {
// Position of the manifest in the file. If it is zero, the manifest is stored
// externally.
uint64 manifest_position = 1;
// Logical offsets of each chunk group, i.e., number of the rows in each
// chunk.
repeated int32 batch_offsets = 2;
// The file position that page table is stored.
//
// A page table is a matrix of N x M x 2, where N = num_fields, and M =
// num_batches. Each cell in the table is a pair of <position:int64,
// length:int64> of the page. Both position and length are int64 values. The
// <position, length> of all the pages in the same column are then
// contiguously stored.
//
// Every field that is a part of the file will have a run in the page table.
// This includes struct columns, which will have a run of length 0 since
// they don't store any actual data.
//
// For example, for the column 5 and batch 4, we have:
// ```text
// position = page_table[5][4][0];
// length = page_table[5][4][1];
// ```
uint64 page_table_position = 3;
message StatisticsMetadata {
// The schema of the statistics.
//
// This might be empty, meaning there are no statistics. It also might not
// contain statistics for every field.
repeated Field schema = 1;
// The field ids of the statistics leaf fields.
//
// This plays a similar role to the `fields` field in the DataFile message.
// Each of these field ids corresponds to a field in the stats_schema. There
// is one per column in the stats page table.
repeated int32 fields = 2;
// The file position of the statistics page table
//
// The page table is a matrix of N x 2, where N = length of stats_fields. This is
// the same layout as the main page table, except there is always only one
// batch.
//
// For example, to get the stats column 5, we have:
// ```text
// position = stats_page_table[5][0];
// length = stats_page_table[5][1];
// ```
uint64 page_table_position = 3;
}
StatisticsMetadata statistics = 4;
} // Metadata
// Supported encodings.
enum Encoding {
// Invalid encoding.
NONE = 0;
// Plain encoding.
PLAIN = 1;
// Var-length binary encoding.
VAR_BINARY = 2;
// Dictionary encoding.
DICTIONARY = 3;
// Run-length encoding.
RLE = 4;
}
// Dictionary field metadata
message Dictionary {
/// The file offset for storing the dictionary value.
/// It is only valid if encoding is DICTIONARY.
///
/// The logic type presents the value type of the column, i.e., string value.
int64 offset = 1;
/// The length of dictionary values.
int64 length = 2;
}
// Field metadata for a column.
message Field {
enum Type {
PARENT = 0;
REPEATED = 1;
LEAF = 2;
}
Type type = 1;
// Fully qualified name.
string name = 2;
/// Field Id.
///
/// See the comment in `DataFile.fields` for how field ids are assigned.
int32 id = 3;
/// Parent Field ID. If not set, this is a top-level column.
int32 parent_id = 4;
// Logical types, support parameterized Arrow Type.
//
// PARENT types will always have logical type "struct".
//
// REPEATED types may have logical types:
// * "list"
// * "large_list"
// * "list.struct"
// * "large_list.struct"
// The final two are used if the list values are structs, and therefore the
// field is both implicitly REPEATED and PARENT.
//
// LEAF types may have logical types:
// * "null"
// * "bool"
// * "int8" / "uint8"
// * "int16" / "uint16"
// * "int32" / "uint32"
// * "int64" / "uint64"
// * "halffloat" / "float" / "double"
// * "string" / "large_string"
// * "binary" / "large_binary"
// * "date32:day"
// * "date64:ms"
// * "decimal:128:{precision}:{scale}" / "decimal:256:{precision}:{scale}"
// * "time:{unit}" / "timestamp:{unit}" / "duration:{unit}", where unit is "s", "ms", "us", "ns"
// * "dict:{value_type}:{index_type}:false"
string logical_type = 5;
// If this field is nullable.
bool nullable = 6;
Encoding encoding = 7;
/// The file offset for storing the dictionary value.
/// It is only valid if encoding is DICTIONARY.
///
/// The logic type presents the value type of the column, i.e., string value.
Dictionary dictionary = 8;
// Deprecated: optional extension type name, use metadata field ARROW:extension:name
string extension_name = 9;
// optional field metadata (e.g. extension type name/parameters)
map<string, bytes> metadata = 10;
}